xref: /kvmtool/virtio/net.c (revision e59679d2a19c5f25a973086ccd356328070e6009)
1 #include "kvm/virtio-pci-dev.h"
2 #include "kvm/virtio-net.h"
3 #include "kvm/virtio.h"
4 #include "kvm/mutex.h"
5 #include "kvm/util.h"
6 #include "kvm/kvm.h"
7 #include "kvm/irq.h"
8 #include "kvm/uip.h"
9 #include "kvm/guest_compat.h"
10 #include "kvm/iovec.h"
11 
12 #include <linux/vhost.h>
13 #include <linux/virtio_net.h>
14 #include <linux/if_tun.h>
15 #include <linux/types.h>
16 
17 #include <arpa/inet.h>
18 #include <net/if.h>
19 
20 #include <unistd.h>
21 #include <fcntl.h>
22 
23 #include <sys/socket.h>
24 #include <sys/ioctl.h>
25 #include <sys/types.h>
26 #include <sys/wait.h>
27 #include <sys/eventfd.h>
28 
29 #define VIRTIO_NET_QUEUE_SIZE		256
30 #define VIRTIO_NET_NUM_QUEUES		8
31 
32 struct net_dev;
33 
34 struct net_dev_operations {
35 	int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
36 	int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
37 };
38 
39 struct net_dev {
40 	struct mutex			mutex;
41 	struct virtio_device		vdev;
42 	struct list_head		list;
43 
44 	struct virt_queue		vqs[VIRTIO_NET_NUM_QUEUES * 2 + 1];
45 	struct virtio_net_config	config;
46 	u32				features, rx_vqs, tx_vqs, queue_pairs;
47 
48 	pthread_t			io_thread[VIRTIO_NET_NUM_QUEUES * 2 + 1];
49 	struct mutex			io_lock[VIRTIO_NET_NUM_QUEUES * 2 + 1];
50 	pthread_cond_t			io_cond[VIRTIO_NET_NUM_QUEUES * 2 + 1];
51 
52 	int				vhost_fd;
53 	int				tap_fd;
54 	char				tap_name[IFNAMSIZ];
55 	bool				tap_ufo;
56 
57 	int				mode;
58 
59 	struct uip_info			info;
60 	struct net_dev_operations	*ops;
61 	struct kvm			*kvm;
62 
63 	struct virtio_net_params	*params;
64 };
65 
66 static LIST_HEAD(ndevs);
67 static int compat_id = -1;
68 
69 #define MAX_PACKET_SIZE 65550
70 
71 static bool has_virtio_feature(struct net_dev *ndev, u32 feature)
72 {
73 	return ndev->features & (1 << feature);
74 }
75 
76 static void virtio_net_fix_tx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev)
77 {
78 	hdr->hdr_len		= virtio_guest_to_host_u16(&ndev->vdev, hdr->hdr_len);
79 	hdr->gso_size		= virtio_guest_to_host_u16(&ndev->vdev, hdr->gso_size);
80 	hdr->csum_start		= virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_start);
81 	hdr->csum_offset	= virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_offset);
82 }
83 
84 static void virtio_net_fix_rx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev)
85 {
86 	hdr->hdr_len		= virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr_len);
87 	hdr->gso_size		= virtio_host_to_guest_u16(&ndev->vdev, hdr->gso_size);
88 	hdr->csum_start		= virtio_host_to_guest_u16(&ndev->vdev, hdr->csum_start);
89 	hdr->csum_offset	= virtio_host_to_guest_u16(&ndev->vdev, hdr->csum_offset);
90 }
91 
92 static void *virtio_net_rx_thread(void *p)
93 {
94 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
95 	struct virt_queue *vq;
96 	struct kvm *kvm;
97 	struct net_dev *ndev = p;
98 	u16 out, in;
99 	u16 head;
100 	int len, copied;
101 	u32 id;
102 
103 	mutex_lock(&ndev->mutex);
104 	id = ndev->rx_vqs++ * 2;
105 	mutex_unlock(&ndev->mutex);
106 
107 	kvm__set_thread_name("virtio-net-rx");
108 
109 	kvm = ndev->kvm;
110 	vq = &ndev->vqs[id];
111 
112 	while (1) {
113 		mutex_lock(&ndev->io_lock[id]);
114 		if (!virt_queue__available(vq))
115 			pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex);
116 		mutex_unlock(&ndev->io_lock[id]);
117 
118 		while (virt_queue__available(vq)) {
119 			unsigned char buffer[MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)];
120 			struct iovec dummy_iov = {
121 				.iov_base = buffer,
122 				.iov_len  = sizeof(buffer),
123 			};
124 			struct virtio_net_hdr_mrg_rxbuf *hdr;
125 			u16 num_buffers;
126 
127 			len = ndev->ops->rx(&dummy_iov, 1, ndev);
128 			if (len < 0) {
129 				pr_warning("%s: rx on vq %u failed (%d), exiting thread\n",
130 						__func__, id, len);
131 				goto out_err;
132 			}
133 
134 			copied = num_buffers = 0;
135 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
136 			hdr = iov[0].iov_base;
137 			while (copied < len) {
138 				size_t iovsize = min_t(size_t, len - copied, iov_size(iov, in));
139 
140 				memcpy_toiovec(iov, buffer + copied, iovsize);
141 				copied += iovsize;
142 				virt_queue__set_used_elem_no_update(vq, head, iovsize, num_buffers++);
143 				if (copied == len)
144 					break;
145 				while (!virt_queue__available(vq))
146 					sleep(0);
147 				head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
148 			}
149 
150 			virtio_net_fix_rx_hdr(&hdr->hdr, ndev);
151 			if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF))
152 				hdr->num_buffers = virtio_host_to_guest_u16(vq, num_buffers);
153 
154 			virt_queue__used_idx_advance(vq, num_buffers);
155 
156 			/* We should interrupt guest right now, otherwise latency is huge. */
157 			if (virtio_queue__should_signal(vq))
158 				ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
159 		}
160 	}
161 
162 out_err:
163 	pthread_exit(NULL);
164 	return NULL;
165 
166 }
167 
168 static void *virtio_net_tx_thread(void *p)
169 {
170 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
171 	struct virt_queue *vq;
172 	struct kvm *kvm;
173 	struct net_dev *ndev = p;
174 	u16 out, in;
175 	u16 head;
176 	int len;
177 	u32 id;
178 
179 	mutex_lock(&ndev->mutex);
180 	id = ndev->tx_vqs++ * 2 + 1;
181 	mutex_unlock(&ndev->mutex);
182 
183 	kvm__set_thread_name("virtio-net-tx");
184 
185 	kvm = ndev->kvm;
186 	vq = &ndev->vqs[id];
187 
188 	while (1) {
189 		mutex_lock(&ndev->io_lock[id]);
190 		if (!virt_queue__available(vq))
191 			pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex);
192 		mutex_unlock(&ndev->io_lock[id]);
193 
194 		while (virt_queue__available(vq)) {
195 			struct virtio_net_hdr *hdr;
196 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
197 			hdr = iov[0].iov_base;
198 			virtio_net_fix_tx_hdr(hdr, ndev);
199 			len = ndev->ops->tx(iov, out, ndev);
200 			if (len < 0) {
201 				pr_warning("%s: tx on vq %u failed (%d)\n",
202 						__func__, id, errno);
203 				goto out_err;
204 			}
205 
206 			virt_queue__set_used_elem(vq, head, len);
207 		}
208 
209 		if (virtio_queue__should_signal(vq))
210 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
211 	}
212 
213 out_err:
214 	pthread_exit(NULL);
215 	return NULL;
216 }
217 
218 static virtio_net_ctrl_ack virtio_net_handle_mq(struct kvm* kvm, struct net_dev *ndev, struct virtio_net_ctrl_hdr *ctrl)
219 {
220 	/* Not much to do here */
221 	return VIRTIO_NET_OK;
222 }
223 
224 static void *virtio_net_ctrl_thread(void *p)
225 {
226 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
227 	u16 out, in, head;
228 	struct net_dev *ndev = p;
229 	struct kvm *kvm = ndev->kvm;
230 	u32 id = ndev->queue_pairs * 2;
231 	struct virt_queue *vq = &ndev->vqs[id];
232 	struct virtio_net_ctrl_hdr *ctrl;
233 	virtio_net_ctrl_ack *ack;
234 
235 	kvm__set_thread_name("virtio-net-ctrl");
236 
237 	while (1) {
238 		mutex_lock(&ndev->io_lock[id]);
239 		if (!virt_queue__available(vq))
240 			pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex);
241 		mutex_unlock(&ndev->io_lock[id]);
242 
243 		while (virt_queue__available(vq)) {
244 			head = virt_queue__get_iov(&ndev->vqs[id], iov, &out, &in, kvm);
245 			ctrl = iov[0].iov_base;
246 			ack = iov[out].iov_base;
247 
248 			switch (ctrl->class) {
249 			case VIRTIO_NET_CTRL_MQ:
250 				*ack = virtio_net_handle_mq(kvm, ndev, ctrl);
251 				break;
252 			default:
253 				*ack = VIRTIO_NET_ERR;
254 				break;
255 			}
256 			virt_queue__set_used_elem(&ndev->vqs[id], head, iov[out].iov_len);
257 		}
258 
259 		if (virtio_queue__should_signal(&ndev->vqs[id]))
260 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
261 	}
262 
263 	pthread_exit(NULL);
264 
265 	return NULL;
266 }
267 
268 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue)
269 {
270 	if ((u32)queue >= (ndev->queue_pairs * 2 + 1)) {
271 		pr_warning("Unknown queue index %u", queue);
272 		return;
273 	}
274 
275 	mutex_lock(&ndev->io_lock[queue]);
276 	pthread_cond_signal(&ndev->io_cond[queue]);
277 	mutex_unlock(&ndev->io_lock[queue]);
278 }
279 
280 static int virtio_net_request_tap(struct net_dev *ndev, struct ifreq *ifr,
281 				  const char *tapname)
282 {
283 	int ret;
284 
285 	memset(ifr, 0, sizeof(*ifr));
286 	ifr->ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
287 	if (tapname)
288 		strncpy(ifr->ifr_name, tapname, sizeof(ifr->ifr_name));
289 
290 	ret = ioctl(ndev->tap_fd, TUNSETIFF, ifr);
291 
292 	if (ret >= 0)
293 		strncpy(ndev->tap_name, ifr->ifr_name, sizeof(ndev->tap_name));
294 	return ret;
295 }
296 
297 static int virtio_net_exec_script(const char* script, const char *tap_name)
298 {
299 	pid_t pid;
300 	int status;
301 
302 	pid = fork();
303 	if (pid == 0) {
304 		execl(script, script, tap_name, NULL);
305 		_exit(1);
306 	} else {
307 		waitpid(pid, &status, 0);
308 		if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
309 			pr_warning("Fail to setup tap by %s", script);
310 			return -1;
311 		}
312 	}
313 	return 0;
314 }
315 
316 static bool virtio_net__tap_init(struct net_dev *ndev)
317 {
318 	int sock = socket(AF_INET, SOCK_STREAM, 0);
319 	int hdr_len;
320 	struct sockaddr_in sin = {0};
321 	struct ifreq ifr;
322 	const struct virtio_net_params *params = ndev->params;
323 	bool skipconf = !!params->tapif;
324 
325 	hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ?
326 			sizeof(struct virtio_net_hdr_mrg_rxbuf) :
327 			sizeof(struct virtio_net_hdr);
328 	if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
329 		pr_warning("Config tap device TUNSETVNETHDRSZ error");
330 
331 	if (strcmp(params->script, "none")) {
332 		if (virtio_net_exec_script(params->script, ndev->tap_name) < 0)
333 			goto fail;
334 	} else if (!skipconf) {
335 		memset(&ifr, 0, sizeof(ifr));
336 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
337 		sin.sin_addr.s_addr = inet_addr(params->host_ip);
338 		memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
339 		ifr.ifr_addr.sa_family = AF_INET;
340 		if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
341 			pr_warning("Could not set ip address on tap device");
342 			goto fail;
343 		}
344 	}
345 
346 	if (!skipconf) {
347 		memset(&ifr, 0, sizeof(ifr));
348 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
349 		ioctl(sock, SIOCGIFFLAGS, &ifr);
350 		ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
351 		if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
352 			pr_warning("Could not bring tap device up");
353 	}
354 
355 	close(sock);
356 
357 	return 1;
358 
359 fail:
360 	if (sock >= 0)
361 		close(sock);
362 	if (ndev->tap_fd >= 0)
363 		close(ndev->tap_fd);
364 
365 	return 0;
366 }
367 
368 static bool virtio_net__tap_create(struct net_dev *ndev)
369 {
370 	int offload;
371 	struct ifreq ifr;
372 	const struct virtio_net_params *params = ndev->params;
373 	bool macvtap = (!!params->tapif) && (params->tapif[0] == '/');
374 
375 	/* Did the user already gave us the FD? */
376 	if (params->fd)
377 		ndev->tap_fd = params->fd;
378 	else {
379 		const char *tap_file = "/dev/net/tun";
380 
381 		/* Did the user ask us to use macvtap? */
382 		if (macvtap)
383 			tap_file = params->tapif;
384 
385 		ndev->tap_fd = open(tap_file, O_RDWR);
386 		if (ndev->tap_fd < 0) {
387 			pr_warning("Unable to open %s", tap_file);
388 			return 0;
389 		}
390 	}
391 
392 	if (!macvtap &&
393 	    virtio_net_request_tap(ndev, &ifr, params->tapif) < 0) {
394 		pr_warning("Config tap device error. Are you root?");
395 		goto fail;
396 	}
397 
398 	/*
399 	 * The UFO support had been removed from kernel in commit:
400 	 * ID: fb652fdfe83710da0ca13448a41b7ed027d0a984
401 	 * https://www.spinics.net/lists/netdev/msg443562.html
402 	 * In oder to support the older kernels without this commit,
403 	 * we set the TUN_F_UFO to offload by default to test the status of
404 	 * UFO kernel support.
405 	 */
406 	ndev->tap_ufo = true;
407 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
408 	if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
409 		/*
410 		 * Is this failure caused by kernel remove the UFO support?
411 		 * Try TUNSETOFFLOAD without TUN_F_UFO.
412 		 */
413 		offload &= ~TUN_F_UFO;
414 		if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
415 			pr_warning("Config tap device TUNSETOFFLOAD error");
416 			goto fail;
417 		}
418 		ndev->tap_ufo = false;
419 	}
420 
421 	return 1;
422 
423 fail:
424 	if ((ndev->tap_fd >= 0) || (!params->fd) )
425 		close(ndev->tap_fd);
426 
427 	return 0;
428 }
429 
430 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
431 {
432 	return writev(ndev->tap_fd, iov, out);
433 }
434 
435 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
436 {
437 	return readv(ndev->tap_fd, iov, in);
438 }
439 
440 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
441 {
442 	return uip_tx(iov, out, &ndev->info);
443 }
444 
445 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
446 {
447 	return uip_rx(iov, in, &ndev->info);
448 }
449 
450 static struct net_dev_operations tap_ops = {
451 	.rx	= tap_ops_rx,
452 	.tx	= tap_ops_tx,
453 };
454 
455 static struct net_dev_operations uip_ops = {
456 	.rx	= uip_ops_rx,
457 	.tx	= uip_ops_tx,
458 };
459 
460 static u8 *get_config(struct kvm *kvm, void *dev)
461 {
462 	struct net_dev *ndev = dev;
463 
464 	return ((u8 *)(&ndev->config));
465 }
466 
467 static u32 get_host_features(struct kvm *kvm, void *dev)
468 {
469 	u32 features;
470 	struct net_dev *ndev = dev;
471 
472 	features = 1UL << VIRTIO_NET_F_MAC
473 		| 1UL << VIRTIO_NET_F_CSUM
474 		| 1UL << VIRTIO_NET_F_HOST_TSO4
475 		| 1UL << VIRTIO_NET_F_HOST_TSO6
476 		| 1UL << VIRTIO_NET_F_GUEST_TSO4
477 		| 1UL << VIRTIO_NET_F_GUEST_TSO6
478 		| 1UL << VIRTIO_RING_F_EVENT_IDX
479 		| 1UL << VIRTIO_RING_F_INDIRECT_DESC
480 		| 1UL << VIRTIO_NET_F_CTRL_VQ
481 		| 1UL << VIRTIO_NET_F_MRG_RXBUF
482 		| 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0);
483 
484 	/*
485 	 * The UFO feature for host and guest only can be enabled when the
486 	 * kernel has TAP UFO support.
487 	 */
488 	if (ndev->tap_ufo)
489 		features |= (1UL << VIRTIO_NET_F_HOST_UFO
490 				| 1UL << VIRTIO_NET_F_GUEST_UFO);
491 
492 	return features;
493 }
494 
495 static int virtio_net__vhost_set_features(struct net_dev *ndev)
496 {
497 	u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX;
498 	u64 vhost_features;
499 
500 	if (ioctl(ndev->vhost_fd, VHOST_GET_FEATURES, &vhost_features) != 0)
501 		die_perror("VHOST_GET_FEATURES failed");
502 
503 	/* make sure both side support mergable rx buffers */
504 	if (vhost_features & 1UL << VIRTIO_NET_F_MRG_RXBUF &&
505 			has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF))
506 		features |= 1UL << VIRTIO_NET_F_MRG_RXBUF;
507 
508 	return ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features);
509 }
510 
511 static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
512 {
513 	struct net_dev *ndev = dev;
514 	struct virtio_net_config *conf = &ndev->config;
515 
516 	ndev->features = features;
517 
518 	conf->status = virtio_host_to_guest_u16(&ndev->vdev, conf->status);
519 	conf->max_virtqueue_pairs = virtio_host_to_guest_u16(&ndev->vdev,
520 							     conf->max_virtqueue_pairs);
521 
522 	if (ndev->mode == NET_MODE_TAP) {
523 		if (!virtio_net__tap_init(ndev))
524 			die_perror("TAP device initialized failed because");
525 
526 		if (ndev->vhost_fd &&
527 				virtio_net__vhost_set_features(ndev) != 0)
528 			die_perror("VHOST_SET_FEATURES failed");
529 	} else {
530 		ndev->info.vnet_hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ?
531 						sizeof(struct virtio_net_hdr_mrg_rxbuf) :
532 						sizeof(struct virtio_net_hdr);
533 		uip_init(&ndev->info);
534 	}
535 }
536 
537 static bool is_ctrl_vq(struct net_dev *ndev, u32 vq)
538 {
539 	return vq == (u32)(ndev->queue_pairs * 2);
540 }
541 
542 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 page_size, u32 align,
543 		   u32 pfn)
544 {
545 	struct vhost_vring_state state = { .index = vq };
546 	struct vhost_vring_addr addr;
547 	struct net_dev *ndev = dev;
548 	struct virt_queue *queue;
549 	void *p;
550 	int r;
551 
552 	compat__remove_message(compat_id);
553 
554 	queue		= &ndev->vqs[vq];
555 	queue->pfn	= pfn;
556 	p		= virtio_get_vq(kvm, queue->pfn, page_size);
557 
558 	vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, align);
559 	virtio_init_device_vq(&ndev->vdev, queue);
560 
561 	mutex_init(&ndev->io_lock[vq]);
562 	pthread_cond_init(&ndev->io_cond[vq], NULL);
563 	if (is_ctrl_vq(ndev, vq)) {
564 		pthread_create(&ndev->io_thread[vq], NULL, virtio_net_ctrl_thread, ndev);
565 
566 		return 0;
567 	} else if (ndev->vhost_fd == 0 ) {
568 		if (vq & 1)
569 			pthread_create(&ndev->io_thread[vq], NULL, virtio_net_tx_thread, ndev);
570 		else
571 			pthread_create(&ndev->io_thread[vq], NULL, virtio_net_rx_thread, ndev);
572 
573 		return 0;
574 	}
575 
576 	if (queue->endian != VIRTIO_ENDIAN_HOST)
577 		die_perror("VHOST requires the same endianness in guest and host");
578 
579 	state.num = queue->vring.num;
580 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state);
581 	if (r < 0)
582 		die_perror("VHOST_SET_VRING_NUM failed");
583 	state.num = 0;
584 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state);
585 	if (r < 0)
586 		die_perror("VHOST_SET_VRING_BASE failed");
587 
588 	addr = (struct vhost_vring_addr) {
589 		.index = vq,
590 		.desc_user_addr = (u64)(unsigned long)queue->vring.desc,
591 		.avail_user_addr = (u64)(unsigned long)queue->vring.avail,
592 		.used_user_addr = (u64)(unsigned long)queue->vring.used,
593 	};
594 
595 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr);
596 	if (r < 0)
597 		die_perror("VHOST_SET_VRING_ADDR failed");
598 
599 	return 0;
600 }
601 
602 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
603 {
604 	struct net_dev *ndev = dev;
605 	struct vhost_vring_file file;
606 	int r;
607 
608 	if (ndev->vhost_fd == 0)
609 		return;
610 
611 	file = (struct vhost_vring_file) {
612 		.index	= vq,
613 		.fd	= eventfd(0, 0),
614 	};
615 
616 	r = irq__add_irqfd(kvm, gsi, file.fd, -1);
617 	if (r < 0)
618 		die_perror("KVM_IRQFD failed");
619 
620 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file);
621 	if (r < 0)
622 		die_perror("VHOST_SET_VRING_CALL failed");
623 	file.fd = ndev->tap_fd;
624 	r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file);
625 	if (r != 0)
626 		die("VHOST_NET_SET_BACKEND failed %d", errno);
627 
628 }
629 
630 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
631 {
632 	struct net_dev *ndev = dev;
633 	struct vhost_vring_file file = {
634 		.index	= vq,
635 		.fd	= efd,
636 	};
637 	int r;
638 
639 	if (ndev->vhost_fd == 0 || is_ctrl_vq(ndev, vq))
640 		return;
641 
642 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file);
643 	if (r < 0)
644 		die_perror("VHOST_SET_VRING_KICK failed");
645 }
646 
647 static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
648 {
649 	struct net_dev *ndev = dev;
650 
651 	virtio_net_handle_callback(kvm, ndev, vq);
652 
653 	return 0;
654 }
655 
656 static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq)
657 {
658 	struct net_dev *ndev = dev;
659 
660 	return ndev->vqs[vq].pfn;
661 }
662 
663 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
664 {
665 	/* FIXME: dynamic */
666 	return VIRTIO_NET_QUEUE_SIZE;
667 }
668 
669 static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size)
670 {
671 	/* FIXME: dynamic */
672 	return size;
673 }
674 
675 static struct virtio_ops net_dev_virtio_ops = {
676 	.get_config		= get_config,
677 	.get_host_features	= get_host_features,
678 	.set_guest_features	= set_guest_features,
679 	.init_vq		= init_vq,
680 	.get_pfn_vq		= get_pfn_vq,
681 	.get_size_vq		= get_size_vq,
682 	.set_size_vq		= set_size_vq,
683 	.notify_vq		= notify_vq,
684 	.notify_vq_gsi		= notify_vq_gsi,
685 	.notify_vq_eventfd	= notify_vq_eventfd,
686 };
687 
688 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev)
689 {
690 	struct kvm_mem_bank *bank;
691 	struct vhost_memory *mem;
692 	int r, i;
693 
694 	ndev->vhost_fd = open("/dev/vhost-net", O_RDWR);
695 	if (ndev->vhost_fd < 0)
696 		die_perror("Failed openning vhost-net device");
697 
698 	mem = calloc(1, sizeof(*mem) + kvm->mem_slots * sizeof(struct vhost_memory_region));
699 	if (mem == NULL)
700 		die("Failed allocating memory for vhost memory map");
701 
702 	i = 0;
703 	list_for_each_entry(bank, &kvm->mem_banks, list) {
704 		mem->regions[i] = (struct vhost_memory_region) {
705 			.guest_phys_addr = bank->guest_phys_addr,
706 			.memory_size	 = bank->size,
707 			.userspace_addr	 = (unsigned long)bank->host_addr,
708 		};
709 		i++;
710 	}
711 	mem->nregions = i;
712 
713 	r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER);
714 	if (r != 0)
715 		die_perror("VHOST_SET_OWNER failed");
716 
717 	r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem);
718 	if (r != 0)
719 		die_perror("VHOST_SET_MEM_TABLE failed");
720 
721 	ndev->vdev.use_vhost = true;
722 
723 	free(mem);
724 }
725 
726 static inline void str_to_mac(const char *str, char *mac)
727 {
728 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
729 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
730 }
731 static int set_net_param(struct kvm *kvm, struct virtio_net_params *p,
732 			const char *param, const char *val)
733 {
734 	if (strcmp(param, "guest_mac") == 0) {
735 		str_to_mac(val, p->guest_mac);
736 	} else if (strcmp(param, "mode") == 0) {
737 		if (!strncmp(val, "user", 4)) {
738 			int i;
739 
740 			for (i = 0; i < kvm->cfg.num_net_devices; i++)
741 				if (kvm->cfg.net_params[i].mode == NET_MODE_USER)
742 					die("Only one usermode network device allowed at a time");
743 			p->mode = NET_MODE_USER;
744 		} else if (!strncmp(val, "tap", 3)) {
745 			p->mode = NET_MODE_TAP;
746 		} else if (!strncmp(val, "none", 4)) {
747 			kvm->cfg.no_net = 1;
748 			return -1;
749 		} else
750 			die("Unknown network mode %s, please use user, tap or none", kvm->cfg.network);
751 	} else if (strcmp(param, "script") == 0) {
752 		p->script = strdup(val);
753 	} else if (strcmp(param, "downscript") == 0) {
754 		p->downscript = strdup(val);
755 	} else if (strcmp(param, "guest_ip") == 0) {
756 		p->guest_ip = strdup(val);
757 	} else if (strcmp(param, "host_ip") == 0) {
758 		p->host_ip = strdup(val);
759 	} else if (strcmp(param, "trans") == 0) {
760 		p->trans = strdup(val);
761 	} else if (strcmp(param, "tapif") == 0) {
762 		p->tapif = strdup(val);
763 	} else if (strcmp(param, "vhost") == 0) {
764 		p->vhost = atoi(val);
765 	} else if (strcmp(param, "fd") == 0) {
766 		p->fd = atoi(val);
767 	} else if (strcmp(param, "mq") == 0) {
768 		p->mq = atoi(val);
769 	} else
770 		die("Unknown network parameter %s", param);
771 
772 	return 0;
773 }
774 
775 int netdev_parser(const struct option *opt, const char *arg, int unset)
776 {
777 	struct virtio_net_params p;
778 	char *buf = NULL, *cmd = NULL, *cur = NULL;
779 	bool on_cmd = true;
780 	struct kvm *kvm = opt->ptr;
781 
782 	if (arg) {
783 		buf = strdup(arg);
784 		if (buf == NULL)
785 			die("Failed allocating new net buffer");
786 		cur = strtok(buf, ",=");
787 	}
788 
789 	p = (struct virtio_net_params) {
790 		.guest_ip	= DEFAULT_GUEST_ADDR,
791 		.host_ip	= DEFAULT_HOST_ADDR,
792 		.script		= DEFAULT_SCRIPT,
793 		.downscript	= DEFAULT_SCRIPT,
794 		.mode		= NET_MODE_TAP,
795 	};
796 
797 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
798 	p.guest_mac[5] += kvm->cfg.num_net_devices;
799 
800 	while (cur) {
801 		if (on_cmd) {
802 			cmd = cur;
803 		} else {
804 			if (set_net_param(kvm, &p, cmd, cur) < 0)
805 				goto done;
806 		}
807 		on_cmd = !on_cmd;
808 
809 		cur = strtok(NULL, ",=");
810 	};
811 
812 	kvm->cfg.num_net_devices++;
813 
814 	kvm->cfg.net_params = realloc(kvm->cfg.net_params, kvm->cfg.num_net_devices * sizeof(*kvm->cfg.net_params));
815 	if (kvm->cfg.net_params == NULL)
816 		die("Failed adding new network device");
817 
818 	kvm->cfg.net_params[kvm->cfg.num_net_devices - 1] = p;
819 
820 done:
821 	free(buf);
822 	return 0;
823 }
824 
825 static int virtio_net__init_one(struct virtio_net_params *params)
826 {
827 	int i, err;
828 	struct net_dev *ndev;
829 	struct virtio_ops *ops;
830 	enum virtio_trans trans = VIRTIO_DEFAULT_TRANS(params->kvm);
831 
832 	ndev = calloc(1, sizeof(struct net_dev));
833 	if (ndev == NULL)
834 		return -ENOMEM;
835 
836 	ops = malloc(sizeof(*ops));
837 	if (ops == NULL) {
838 		err = -ENOMEM;
839 		goto err_free_ndev;
840 	}
841 
842 	list_add_tail(&ndev->list, &ndevs);
843 
844 	ndev->kvm = params->kvm;
845 	ndev->params = params;
846 
847 	mutex_init(&ndev->mutex);
848 	ndev->queue_pairs = max(1, min(VIRTIO_NET_NUM_QUEUES, params->mq));
849 	ndev->config.status = VIRTIO_NET_S_LINK_UP;
850 	if (ndev->queue_pairs > 1)
851 		ndev->config.max_virtqueue_pairs = ndev->queue_pairs;
852 
853 	for (i = 0 ; i < 6 ; i++) {
854 		ndev->config.mac[i]		= params->guest_mac[i];
855 		ndev->info.guest_mac.addr[i]	= params->guest_mac[i];
856 		ndev->info.host_mac.addr[i]	= params->host_mac[i];
857 	}
858 
859 	ndev->mode = params->mode;
860 	if (ndev->mode == NET_MODE_TAP) {
861 		ndev->ops = &tap_ops;
862 		if (!virtio_net__tap_create(ndev))
863 			die_perror("You have requested a TAP device, but creation of one has failed because");
864 	} else {
865 		ndev->info.host_ip		= ntohl(inet_addr(params->host_ip));
866 		ndev->info.guest_ip		= ntohl(inet_addr(params->guest_ip));
867 		ndev->info.guest_netmask	= ntohl(inet_addr("255.255.255.0"));
868 		ndev->info.buf_nr		= 20,
869 		ndev->ops = &uip_ops;
870 		uip_static_init(&ndev->info);
871 	}
872 
873 	*ops = net_dev_virtio_ops;
874 
875 	if (params->trans) {
876 		if (strcmp(params->trans, "mmio") == 0)
877 			trans = VIRTIO_MMIO;
878 		else if (strcmp(params->trans, "pci") == 0)
879 			trans = VIRTIO_PCI;
880 		else
881 			pr_warning("virtio-net: Unknown transport method : %s, "
882 				   "falling back to %s.", params->trans,
883 				   virtio_trans_name(trans));
884 	}
885 
886 	virtio_init(params->kvm, ndev, &ndev->vdev, ops, trans,
887 		    PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET);
888 
889 	if (params->vhost)
890 		virtio_net__vhost_init(params->kvm, ndev);
891 
892 	if (compat_id == -1)
893 		compat_id = virtio_compat_add_message("virtio-net", "CONFIG_VIRTIO_NET");
894 
895 	return 0;
896 
897 err_free_ndev:
898 	free(ndev);
899 	return err;
900 }
901 
902 int virtio_net__init(struct kvm *kvm)
903 {
904 	int i;
905 
906 	for (i = 0; i < kvm->cfg.num_net_devices; i++) {
907 		kvm->cfg.net_params[i].kvm = kvm;
908 		virtio_net__init_one(&kvm->cfg.net_params[i]);
909 	}
910 
911 	if (kvm->cfg.num_net_devices == 0 && kvm->cfg.no_net == 0) {
912 		static struct virtio_net_params net_params;
913 
914 		net_params = (struct virtio_net_params) {
915 			.guest_ip	= kvm->cfg.guest_ip,
916 			.host_ip	= kvm->cfg.host_ip,
917 			.kvm		= kvm,
918 			.script		= kvm->cfg.script,
919 			.mode		= NET_MODE_USER,
920 		};
921 		str_to_mac(kvm->cfg.guest_mac, net_params.guest_mac);
922 		str_to_mac(kvm->cfg.host_mac, net_params.host_mac);
923 
924 		virtio_net__init_one(&net_params);
925 	}
926 
927 	return 0;
928 }
929 virtio_dev_init(virtio_net__init);
930 
931 int virtio_net__exit(struct kvm *kvm)
932 {
933 	struct virtio_net_params *params;
934 	struct net_dev *ndev;
935 	struct list_head *ptr;
936 
937 	list_for_each(ptr, &ndevs) {
938 		ndev = list_entry(ptr, struct net_dev, list);
939 		params = ndev->params;
940 		/* Cleanup any tap device which attached to bridge */
941 		if (ndev->mode == NET_MODE_TAP &&
942 		    strcmp(params->downscript, "none"))
943 			virtio_net_exec_script(params->downscript, ndev->tap_name);
944 	}
945 	return 0;
946 }
947 virtio_dev_exit(virtio_net__exit);
948