xref: /kvmtool/virtio/net.c (revision 46aaf3b87d7ede27d5d6dd0e7da0574d9ab913b8)
1 #include "kvm/virtio-pci-dev.h"
2 #include "kvm/virtio-net.h"
3 #include "kvm/virtio.h"
4 #include "kvm/mutex.h"
5 #include "kvm/util.h"
6 #include "kvm/kvm.h"
7 #include "kvm/uip.h"
8 #include "kvm/guest_compat.h"
9 #include "kvm/iovec.h"
10 #include "kvm/strbuf.h"
11 
12 #include <linux/list.h>
13 #include <linux/vhost.h>
14 #include <linux/virtio_net.h>
15 #include <linux/if_tun.h>
16 #include <linux/types.h>
17 
18 #include <arpa/inet.h>
19 #include <net/if.h>
20 
21 #include <unistd.h>
22 #include <fcntl.h>
23 
24 #include <sys/socket.h>
25 #include <sys/ioctl.h>
26 #include <sys/types.h>
27 #include <sys/wait.h>
28 
29 #define VIRTIO_NET_QUEUE_SIZE		256
30 #define VIRTIO_NET_NUM_QUEUES		8
31 
32 struct net_dev;
33 
34 struct net_dev_operations {
35 	int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
36 	int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
37 };
38 
39 struct net_dev_queue {
40 	int				id;
41 	struct net_dev			*ndev;
42 	struct virt_queue		vq;
43 	pthread_t			thread;
44 	struct mutex			lock;
45 	pthread_cond_t			cond;
46 };
47 
48 struct net_dev {
49 	struct mutex			mutex;
50 	struct virtio_device		vdev;
51 	struct list_head		list;
52 
53 	struct net_dev_queue		queues[VIRTIO_NET_NUM_QUEUES * 2 + 1];
54 	struct virtio_net_config	config;
55 	u32				queue_pairs;
56 
57 	int				vhost_fd;
58 	int				tap_fd;
59 	char				tap_name[IFNAMSIZ];
60 	bool				tap_ufo;
61 
62 	int				mode;
63 
64 	struct uip_info			info;
65 	struct net_dev_operations	*ops;
66 	struct kvm			*kvm;
67 
68 	struct virtio_net_params	*params;
69 };
70 
71 static LIST_HEAD(ndevs);
72 static int compat_id = -1;
73 
74 #define MAX_PACKET_SIZE 65550
75 
76 static bool has_virtio_feature(struct net_dev *ndev, u32 feature)
77 {
78 	return ndev->vdev.features & (1 << feature);
79 }
80 
81 static int virtio_net_hdr_len(struct net_dev *ndev)
82 {
83 	if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ||
84 	    !ndev->vdev.legacy)
85 		return sizeof(struct virtio_net_hdr_mrg_rxbuf);
86 
87 	return sizeof(struct virtio_net_hdr);
88 }
89 
90 static void *virtio_net_rx_thread(void *p)
91 {
92 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
93 	struct net_dev_queue *queue = p;
94 	struct virt_queue *vq = &queue->vq;
95 	struct net_dev *ndev = queue->ndev;
96 	struct kvm *kvm;
97 	u16 out, in;
98 	u16 head;
99 	int len, copied;
100 
101 	kvm__set_thread_name("virtio-net-rx");
102 
103 	kvm = ndev->kvm;
104 	while (1) {
105 		mutex_lock(&queue->lock);
106 		if (!virt_queue__available(vq))
107 			pthread_cond_wait(&queue->cond, &queue->lock.mutex);
108 		mutex_unlock(&queue->lock);
109 
110 		while (virt_queue__available(vq)) {
111 			unsigned char buffer[MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)];
112 			struct iovec dummy_iov = {
113 				.iov_base = buffer,
114 				.iov_len  = sizeof(buffer),
115 			};
116 			struct virtio_net_hdr_mrg_rxbuf *hdr;
117 			u16 num_buffers;
118 
119 			len = ndev->ops->rx(&dummy_iov, 1, ndev);
120 			if (len < 0) {
121 				pr_warning("%s: rx on vq %u failed (%d), exiting thread\n",
122 						__func__, queue->id, len);
123 				goto out_err;
124 			}
125 
126 			copied = num_buffers = 0;
127 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
128 			hdr = iov[0].iov_base;
129 			while (copied < len) {
130 				size_t iovsize = min_t(size_t, len - copied, iov_size(iov, in));
131 
132 				memcpy_toiovec(iov, buffer + copied, iovsize);
133 				copied += iovsize;
134 				virt_queue__set_used_elem_no_update(vq, head, iovsize, num_buffers++);
135 				if (copied == len)
136 					break;
137 				while (!virt_queue__available(vq))
138 					sleep(0);
139 				head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
140 			}
141 
142 			/*
143 			 * The device MUST set num_buffers, except in the case
144 			 * where the legacy driver did not negotiate
145 			 * VIRTIO_NET_F_MRG_RXBUF and the field does not exist.
146 			 */
147 			if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ||
148 			    !ndev->vdev.legacy)
149 				hdr->num_buffers = virtio_host_to_guest_u16(vq->endian, num_buffers);
150 
151 			virt_queue__used_idx_advance(vq, num_buffers);
152 
153 			/* We should interrupt guest right now, otherwise latency is huge. */
154 			if (virtio_queue__should_signal(vq))
155 				ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id);
156 		}
157 	}
158 
159 out_err:
160 	pthread_exit(NULL);
161 	return NULL;
162 
163 }
164 
165 static void *virtio_net_tx_thread(void *p)
166 {
167 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
168 	struct net_dev_queue *queue = p;
169 	struct virt_queue *vq = &queue->vq;
170 	struct net_dev *ndev = queue->ndev;
171 	struct kvm *kvm;
172 	u16 out, in;
173 	u16 head;
174 	int len;
175 
176 	kvm__set_thread_name("virtio-net-tx");
177 
178 	kvm = ndev->kvm;
179 
180 	while (1) {
181 		mutex_lock(&queue->lock);
182 		if (!virt_queue__available(vq))
183 			pthread_cond_wait(&queue->cond, &queue->lock.mutex);
184 		mutex_unlock(&queue->lock);
185 
186 		while (virt_queue__available(vq)) {
187 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
188 			len = ndev->ops->tx(iov, out, ndev);
189 			if (len < 0) {
190 				pr_warning("%s: tx on vq %u failed (%d)\n",
191 						__func__, queue->id, errno);
192 				goto out_err;
193 			}
194 
195 			virt_queue__set_used_elem(vq, head, len);
196 		}
197 
198 		if (virtio_queue__should_signal(vq))
199 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id);
200 	}
201 
202 out_err:
203 	pthread_exit(NULL);
204 	return NULL;
205 }
206 
207 static virtio_net_ctrl_ack virtio_net_handle_mq(struct kvm* kvm, struct net_dev *ndev, struct virtio_net_ctrl_hdr *ctrl)
208 {
209 	/* Not much to do here */
210 	return VIRTIO_NET_OK;
211 }
212 
213 static void *virtio_net_ctrl_thread(void *p)
214 {
215 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
216 	struct net_dev_queue *queue = p;
217 	struct virt_queue *vq = &queue->vq;
218 	struct net_dev *ndev = queue->ndev;
219 	u16 out, in, head;
220 	struct kvm *kvm = ndev->kvm;
221 	struct virtio_net_ctrl_hdr ctrl;
222 	virtio_net_ctrl_ack ack;
223 	size_t len;
224 
225 	kvm__set_thread_name("virtio-net-ctrl");
226 
227 	while (1) {
228 		mutex_lock(&queue->lock);
229 		if (!virt_queue__available(vq))
230 			pthread_cond_wait(&queue->cond, &queue->lock.mutex);
231 		mutex_unlock(&queue->lock);
232 
233 		while (virt_queue__available(vq)) {
234 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
235 			len = min(iov_size(iov, in), sizeof(ctrl));
236 			memcpy_fromiovec((void *)&ctrl, iov, len);
237 
238 			switch (ctrl.class) {
239 			case VIRTIO_NET_CTRL_MQ:
240 				ack = virtio_net_handle_mq(kvm, ndev, &ctrl);
241 				break;
242 			default:
243 				ack = VIRTIO_NET_ERR;
244 				break;
245 			}
246 			memcpy_toiovec(iov + in, &ack, sizeof(ack));
247 			virt_queue__set_used_elem(vq, head, sizeof(ack));
248 		}
249 
250 		if (virtio_queue__should_signal(vq))
251 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id);
252 	}
253 
254 	pthread_exit(NULL);
255 
256 	return NULL;
257 }
258 
259 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue)
260 {
261 	struct net_dev_queue *net_queue = &ndev->queues[queue];
262 
263 	if ((u32)queue >= (ndev->queue_pairs * 2 + 1)) {
264 		pr_warning("Unknown queue index %u", queue);
265 		return;
266 	}
267 
268 	mutex_lock(&net_queue->lock);
269 	pthread_cond_signal(&net_queue->cond);
270 	mutex_unlock(&net_queue->lock);
271 }
272 
273 static int virtio_net_request_tap(struct net_dev *ndev, struct ifreq *ifr,
274 				  const char *tapname)
275 {
276 	int ret;
277 
278 	memset(ifr, 0, sizeof(*ifr));
279 	ifr->ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
280 	if (tapname)
281 		strlcpy(ifr->ifr_name, tapname, sizeof(ifr->ifr_name));
282 
283 	ret = ioctl(ndev->tap_fd, TUNSETIFF, ifr);
284 
285 	if (ret >= 0)
286 		strlcpy(ndev->tap_name, ifr->ifr_name, sizeof(ndev->tap_name));
287 	return ret;
288 }
289 
290 static int virtio_net_exec_script(const char* script, const char *tap_name)
291 {
292 	pid_t pid;
293 	int status;
294 
295 	pid = vfork();
296 	if (pid == 0) {
297 		execl(script, script, tap_name, NULL);
298 		_exit(1);
299 	} else {
300 		waitpid(pid, &status, 0);
301 		if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
302 			pr_warning("Fail to setup tap by %s", script);
303 			return -1;
304 		}
305 	}
306 	return 0;
307 }
308 
309 static bool virtio_net__tap_init(struct net_dev *ndev)
310 {
311 	int sock = socket(AF_INET, SOCK_STREAM, 0);
312 	int hdr_len;
313 	struct sockaddr_in sin = {0};
314 	struct ifreq ifr;
315 	const struct virtio_net_params *params = ndev->params;
316 	bool skipconf = !!params->tapif;
317 
318 	hdr_len = virtio_net_hdr_len(ndev);
319 	if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
320 		pr_warning("Config tap device TUNSETVNETHDRSZ error");
321 
322 	if (strcmp(params->script, "none")) {
323 		if (virtio_net_exec_script(params->script, ndev->tap_name) < 0)
324 			goto fail;
325 	} else if (!skipconf) {
326 		memset(&ifr, 0, sizeof(ifr));
327 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ifr.ifr_name));
328 		sin.sin_addr.s_addr = inet_addr(params->host_ip);
329 		memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
330 		ifr.ifr_addr.sa_family = AF_INET;
331 		if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
332 			pr_warning("Could not set ip address on tap device");
333 			goto fail;
334 		}
335 	}
336 
337 	if (!skipconf) {
338 		memset(&ifr, 0, sizeof(ifr));
339 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ifr.ifr_name));
340 		ioctl(sock, SIOCGIFFLAGS, &ifr);
341 		ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
342 		if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
343 			pr_warning("Could not bring tap device up");
344 	}
345 
346 	close(sock);
347 
348 	return 1;
349 
350 fail:
351 	if (sock >= 0)
352 		close(sock);
353 	if (ndev->tap_fd >= 0)
354 		close(ndev->tap_fd);
355 
356 	return 0;
357 }
358 
359 static void virtio_net__tap_exit(struct net_dev *ndev)
360 {
361 	int sock;
362 	struct ifreq ifr;
363 
364 	if (ndev->params->tapif)
365 		return;
366 
367 	sock = socket(AF_INET, SOCK_STREAM, 0);
368 	strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ifr.ifr_name));
369 	ioctl(sock, SIOCGIFFLAGS, &ifr);
370 	ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING);
371 	if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0)
372 		pr_warning("Count not bring tap device down");
373 	close(sock);
374 }
375 
376 static bool virtio_net__tap_create(struct net_dev *ndev)
377 {
378 	int offload;
379 	struct ifreq ifr;
380 	const struct virtio_net_params *params = ndev->params;
381 	bool macvtap = (!!params->tapif) && (params->tapif[0] == '/');
382 
383 	/* Did the user already gave us the FD? */
384 	if (params->fd)
385 		ndev->tap_fd = params->fd;
386 	else {
387 		const char *tap_file = "/dev/net/tun";
388 
389 		/* Did the user ask us to use macvtap? */
390 		if (macvtap)
391 			tap_file = params->tapif;
392 
393 		ndev->tap_fd = open(tap_file, O_RDWR);
394 		if (ndev->tap_fd < 0) {
395 			pr_warning("Unable to open %s", tap_file);
396 			return 0;
397 		}
398 	}
399 
400 	if (!macvtap &&
401 	    virtio_net_request_tap(ndev, &ifr, params->tapif) < 0) {
402 		pr_warning("Config tap device error. Are you root?");
403 		goto fail;
404 	}
405 
406 	/*
407 	 * The UFO support had been removed from kernel in commit:
408 	 * ID: fb652fdfe83710da0ca13448a41b7ed027d0a984
409 	 * https://www.spinics.net/lists/netdev/msg443562.html
410 	 * In oder to support the older kernels without this commit,
411 	 * we set the TUN_F_UFO to offload by default to test the status of
412 	 * UFO kernel support.
413 	 */
414 	ndev->tap_ufo = true;
415 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
416 	if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
417 		/*
418 		 * Is this failure caused by kernel remove the UFO support?
419 		 * Try TUNSETOFFLOAD without TUN_F_UFO.
420 		 */
421 		offload &= ~TUN_F_UFO;
422 		if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
423 			pr_warning("Config tap device TUNSETOFFLOAD error");
424 			goto fail;
425 		}
426 		ndev->tap_ufo = false;
427 	}
428 
429 	return 1;
430 
431 fail:
432 	if ((ndev->tap_fd >= 0) || (!params->fd) )
433 		close(ndev->tap_fd);
434 
435 	return 0;
436 }
437 
438 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
439 {
440 	return writev(ndev->tap_fd, iov, out);
441 }
442 
443 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
444 {
445 	return readv(ndev->tap_fd, iov, in);
446 }
447 
448 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
449 {
450 	return uip_tx(iov, out, &ndev->info);
451 }
452 
453 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
454 {
455 	return uip_rx(iov, in, &ndev->info);
456 }
457 
458 static struct net_dev_operations tap_ops = {
459 	.rx	= tap_ops_rx,
460 	.tx	= tap_ops_tx,
461 };
462 
463 static struct net_dev_operations uip_ops = {
464 	.rx	= uip_ops_rx,
465 	.tx	= uip_ops_tx,
466 };
467 
468 static u8 *get_config(struct kvm *kvm, void *dev)
469 {
470 	struct net_dev *ndev = dev;
471 
472 	return ((u8 *)(&ndev->config));
473 }
474 
475 static size_t get_config_size(struct kvm *kvm, void *dev)
476 {
477 	struct net_dev *ndev = dev;
478 
479 	return sizeof(ndev->config);
480 }
481 
482 static u64 get_host_features(struct kvm *kvm, void *dev)
483 {
484 	u64 features;
485 	struct net_dev *ndev = dev;
486 
487 	features = 1UL << VIRTIO_NET_F_MAC
488 		| 1UL << VIRTIO_NET_F_CSUM
489 		| 1UL << VIRTIO_NET_F_HOST_TSO4
490 		| 1UL << VIRTIO_NET_F_HOST_TSO6
491 		| 1UL << VIRTIO_NET_F_GUEST_TSO4
492 		| 1UL << VIRTIO_NET_F_GUEST_TSO6
493 		| 1UL << VIRTIO_RING_F_EVENT_IDX
494 		| 1UL << VIRTIO_RING_F_INDIRECT_DESC
495 		| 1UL << VIRTIO_NET_F_CTRL_VQ
496 		| 1UL << VIRTIO_NET_F_MRG_RXBUF
497 		| 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0)
498 		| 1UL << VIRTIO_F_ANY_LAYOUT;
499 
500 	/*
501 	 * The UFO feature for host and guest only can be enabled when the
502 	 * kernel has TAP UFO support.
503 	 */
504 	if (ndev->tap_ufo)
505 		features |= (1UL << VIRTIO_NET_F_HOST_UFO
506 				| 1UL << VIRTIO_NET_F_GUEST_UFO);
507 
508 	if (ndev->vhost_fd) {
509 		u64 vhost_features;
510 
511 		if (ioctl(ndev->vhost_fd, VHOST_GET_FEATURES, &vhost_features) != 0)
512 			die_perror("VHOST_GET_FEATURES failed");
513 
514 		features &= vhost_features;
515 	}
516 
517 	return features;
518 }
519 
520 static int virtio_net__vhost_set_features(struct net_dev *ndev)
521 {
522 	/* VHOST_NET_F_VIRTIO_NET_HDR clashes with VIRTIO_F_ANY_LAYOUT! */
523 	u64 features = ndev->vdev.features &
524 			~(1UL << VHOST_NET_F_VIRTIO_NET_HDR);
525 
526 	return ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features);
527 }
528 
529 static void virtio_net_start(struct net_dev *ndev)
530 {
531 	if (ndev->mode == NET_MODE_TAP) {
532 		if (!virtio_net__tap_init(ndev))
533 			die_perror("TAP device initialized failed because");
534 
535 		if (ndev->vhost_fd &&
536 				virtio_net__vhost_set_features(ndev) != 0)
537 			die_perror("VHOST_SET_FEATURES failed");
538 	} else {
539 		ndev->info.vnet_hdr_len = virtio_net_hdr_len(ndev);
540 		uip_init(&ndev->info);
541 	}
542 }
543 
544 static void virtio_net_stop(struct net_dev *ndev)
545 {
546 	/* Undo whatever start() did */
547 	if (ndev->mode == NET_MODE_TAP)
548 		virtio_net__tap_exit(ndev);
549 	else
550 		uip_exit(&ndev->info);
551 }
552 
553 static void virtio_net_update_endian(struct net_dev *ndev)
554 {
555 	struct virtio_net_config *conf = &ndev->config;
556 
557 	conf->status = virtio_host_to_guest_u16(ndev->vdev.endian,
558 						VIRTIO_NET_S_LINK_UP);
559 	conf->max_virtqueue_pairs = virtio_host_to_guest_u16(ndev->vdev.endian,
560 							     ndev->queue_pairs);
561 
562 	/* Let TAP know about vnet header endianness */
563 	if (ndev->mode == NET_MODE_TAP &&
564 	    ndev->vdev.endian != VIRTIO_ENDIAN_HOST) {
565 		int enable_val = 1, disable_val = 0;
566 		int enable_req, disable_req;
567 
568 		if (ndev->vdev.endian == VIRTIO_ENDIAN_LE) {
569 			enable_req = TUNSETVNETLE;
570 			disable_req = TUNSETVNETBE;
571 		} else {
572 			enable_req = TUNSETVNETBE;
573 			disable_req = TUNSETVNETLE;
574 		}
575 
576 		ioctl(ndev->tap_fd, disable_req, &disable_val);
577 		if (ioctl(ndev->tap_fd, enable_req, &enable_val) < 0)
578 			pr_err("Config tap device TUNSETVNETLE/BE error");
579 	}
580 }
581 
582 static void notify_status(struct kvm *kvm, void *dev, u32 status)
583 {
584 	struct net_dev *ndev = dev;
585 
586 	if (status & VIRTIO__STATUS_CONFIG)
587 		virtio_net_update_endian(ndev);
588 
589 	if (status & VIRTIO__STATUS_START)
590 		virtio_net_start(dev);
591 	else if (status & VIRTIO__STATUS_STOP)
592 		virtio_net_stop(dev);
593 }
594 
595 static bool is_ctrl_vq(struct net_dev *ndev, u32 vq)
596 {
597 	return vq == (u32)(ndev->queue_pairs * 2);
598 }
599 
600 static int init_vq(struct kvm *kvm, void *dev, u32 vq)
601 {
602 	struct vhost_vring_file file = { .index = vq };
603 	struct net_dev_queue *net_queue;
604 	struct net_dev *ndev = dev;
605 	struct virt_queue *queue;
606 	int r;
607 
608 	compat__remove_message(compat_id);
609 
610 	net_queue	= &ndev->queues[vq];
611 	net_queue->id	= vq;
612 	net_queue->ndev	= ndev;
613 	queue		= &net_queue->vq;
614 	virtio_init_device_vq(kvm, &ndev->vdev, queue, VIRTIO_NET_QUEUE_SIZE);
615 
616 	mutex_init(&net_queue->lock);
617 	pthread_cond_init(&net_queue->cond, NULL);
618 	if (is_ctrl_vq(ndev, vq)) {
619 		pthread_create(&net_queue->thread, NULL, virtio_net_ctrl_thread,
620 			       net_queue);
621 
622 		return 0;
623 	} else if (ndev->vhost_fd == 0 ) {
624 		if (vq & 1)
625 			pthread_create(&net_queue->thread, NULL,
626 				       virtio_net_tx_thread, net_queue);
627 		else
628 			pthread_create(&net_queue->thread, NULL,
629 				       virtio_net_rx_thread, net_queue);
630 
631 		return 0;
632 	}
633 
634 	virtio_vhost_set_vring(kvm, ndev->vhost_fd, vq, queue);
635 
636 	file.fd = ndev->tap_fd;
637 	r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file);
638 	if (r < 0)
639 		die_perror("VHOST_NET_SET_BACKEND failed");
640 
641 	return 0;
642 }
643 
644 static void exit_vq(struct kvm *kvm, void *dev, u32 vq)
645 {
646 	struct net_dev *ndev = dev;
647 	struct net_dev_queue *queue = &ndev->queues[vq];
648 
649 	virtio_vhost_reset_vring(kvm, ndev->vhost_fd, vq, &queue->vq);
650 
651 	/*
652 	 * TODO: vhost reset owner. It's the only way to cleanly stop vhost, but
653 	 * we can't restart it at the moment.
654 	 */
655 	if (ndev->vhost_fd && !is_ctrl_vq(ndev, vq)) {
656 		pr_warning("Cannot reset VHOST queue");
657 		ioctl(ndev->vhost_fd, VHOST_RESET_OWNER);
658 		return;
659 	}
660 
661 	/*
662 	 * Threads are waiting on cancellation points (readv or
663 	 * pthread_cond_wait) and should stop gracefully.
664 	 */
665 	pthread_cancel(queue->thread);
666 	pthread_join(queue->thread, NULL);
667 }
668 
669 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
670 {
671 	struct net_dev *ndev = dev;
672 	struct net_dev_queue *queue = &ndev->queues[vq];
673 
674 	if (ndev->vhost_fd == 0 || is_ctrl_vq(ndev, vq))
675 		return;
676 
677 	virtio_vhost_set_vring_irqfd(kvm, gsi, &queue->vq);
678 }
679 
680 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
681 {
682 	struct net_dev *ndev = dev;
683 
684 	if (ndev->vhost_fd == 0 || is_ctrl_vq(ndev, vq))
685 		return;
686 
687 	virtio_vhost_set_vring_kick(kvm, ndev->vhost_fd, vq, efd);
688 }
689 
690 static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
691 {
692 	struct net_dev *ndev = dev;
693 
694 	virtio_net_handle_callback(kvm, ndev, vq);
695 
696 	return 0;
697 }
698 
699 static struct virt_queue *get_vq(struct kvm *kvm, void *dev, u32 vq)
700 {
701 	struct net_dev *ndev = dev;
702 
703 	return &ndev->queues[vq].vq;
704 }
705 
706 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
707 {
708 	/* FIXME: dynamic */
709 	return VIRTIO_NET_QUEUE_SIZE;
710 }
711 
712 static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size)
713 {
714 	/* FIXME: dynamic */
715 	return size;
716 }
717 
718 static unsigned int get_vq_count(struct kvm *kvm, void *dev)
719 {
720 	struct net_dev *ndev = dev;
721 
722 	return ndev->queue_pairs * 2 + 1;
723 }
724 
725 static struct virtio_ops net_dev_virtio_ops = {
726 	.get_config		= get_config,
727 	.get_config_size	= get_config_size,
728 	.get_host_features	= get_host_features,
729 	.get_vq_count		= get_vq_count,
730 	.init_vq		= init_vq,
731 	.exit_vq		= exit_vq,
732 	.get_vq			= get_vq,
733 	.get_size_vq		= get_size_vq,
734 	.set_size_vq		= set_size_vq,
735 	.notify_vq		= notify_vq,
736 	.notify_vq_gsi		= notify_vq_gsi,
737 	.notify_vq_eventfd	= notify_vq_eventfd,
738 	.notify_status		= notify_status,
739 };
740 
741 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev)
742 {
743 	if (ndev->queue_pairs > 1) {
744 		pr_warning("multiqueue is not supported with vhost yet");
745 		return;
746 	}
747 
748 	ndev->vhost_fd = open("/dev/vhost-net", O_RDWR);
749 	if (ndev->vhost_fd < 0)
750 		die_perror("Failed openning vhost-net device");
751 
752 	virtio_vhost_init(kvm, ndev->vhost_fd);
753 
754 	ndev->vdev.use_vhost = true;
755 }
756 
757 static inline void str_to_mac(const char *str, char *mac)
758 {
759 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
760 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
761 }
762 static int set_net_param(struct kvm *kvm, struct virtio_net_params *p,
763 			const char *param, const char *val)
764 {
765 	if (strcmp(param, "guest_mac") == 0) {
766 		str_to_mac(val, p->guest_mac);
767 	} else if (strcmp(param, "mode") == 0) {
768 		if (!strncmp(val, "user", 4)) {
769 			int i;
770 
771 			for (i = 0; i < kvm->cfg.num_net_devices; i++)
772 				if (kvm->cfg.net_params[i].mode == NET_MODE_USER)
773 					die("Only one usermode network device allowed at a time");
774 			p->mode = NET_MODE_USER;
775 		} else if (!strncmp(val, "tap", 3)) {
776 			p->mode = NET_MODE_TAP;
777 		} else if (!strncmp(val, "none", 4)) {
778 			kvm->cfg.no_net = 1;
779 			return -1;
780 		} else
781 			die("Unknown network mode %s, please use user, tap or none", kvm->cfg.network);
782 	} else if (strcmp(param, "script") == 0) {
783 		p->script = strdup(val);
784 	} else if (strcmp(param, "downscript") == 0) {
785 		p->downscript = strdup(val);
786 	} else if (strcmp(param, "guest_ip") == 0) {
787 		p->guest_ip = strdup(val);
788 	} else if (strcmp(param, "host_ip") == 0) {
789 		p->host_ip = strdup(val);
790 	} else if (strcmp(param, "trans") == 0) {
791 		p->trans = strdup(val);
792 	} else if (strcmp(param, "tapif") == 0) {
793 		p->tapif = strdup(val);
794 	} else if (strcmp(param, "vhost") == 0) {
795 		p->vhost = atoi(val);
796 	} else if (strcmp(param, "fd") == 0) {
797 		p->fd = atoi(val);
798 	} else if (strcmp(param, "mq") == 0) {
799 		p->mq = atoi(val);
800 	} else
801 		die("Unknown network parameter %s", param);
802 
803 	return 0;
804 }
805 
806 int netdev_parser(const struct option *opt, const char *arg, int unset)
807 {
808 	struct virtio_net_params p;
809 	char *buf = NULL, *cmd = NULL, *cur = NULL;
810 	bool on_cmd = true;
811 	struct kvm *kvm = opt->ptr;
812 
813 	if (arg) {
814 		buf = strdup(arg);
815 		if (buf == NULL)
816 			die("Failed allocating new net buffer");
817 		cur = strtok(buf, ",=");
818 	}
819 
820 	p = (struct virtio_net_params) {
821 		.guest_ip	= DEFAULT_GUEST_ADDR,
822 		.host_ip	= DEFAULT_HOST_ADDR,
823 		.script		= DEFAULT_SCRIPT,
824 		.downscript	= DEFAULT_SCRIPT,
825 		.mode		= NET_MODE_TAP,
826 	};
827 
828 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
829 	p.guest_mac[5] += kvm->cfg.num_net_devices;
830 
831 	while (cur) {
832 		if (on_cmd) {
833 			cmd = cur;
834 		} else {
835 			if (set_net_param(kvm, &p, cmd, cur) < 0)
836 				goto done;
837 		}
838 		on_cmd = !on_cmd;
839 
840 		cur = strtok(NULL, ",=");
841 	};
842 
843 	kvm->cfg.num_net_devices++;
844 
845 	kvm->cfg.net_params = realloc(kvm->cfg.net_params, kvm->cfg.num_net_devices * sizeof(*kvm->cfg.net_params));
846 	if (kvm->cfg.net_params == NULL)
847 		die("Failed adding new network device");
848 
849 	kvm->cfg.net_params[kvm->cfg.num_net_devices - 1] = p;
850 
851 done:
852 	free(buf);
853 	return 0;
854 }
855 
856 static int virtio_net__init_one(struct virtio_net_params *params)
857 {
858 	enum virtio_trans trans = params->kvm->cfg.virtio_transport;
859 	struct net_dev *ndev;
860 	struct virtio_ops *ops;
861 	int i, r;
862 
863 	ndev = calloc(1, sizeof(struct net_dev));
864 	if (ndev == NULL)
865 		return -ENOMEM;
866 
867 	list_add_tail(&ndev->list, &ndevs);
868 
869 	ops = malloc(sizeof(*ops));
870 	if (ops == NULL)
871 		return -ENOMEM;
872 
873 	ndev->kvm = params->kvm;
874 	ndev->params = params;
875 
876 	mutex_init(&ndev->mutex);
877 	ndev->queue_pairs = max(1, min(VIRTIO_NET_NUM_QUEUES, params->mq));
878 
879 	for (i = 0 ; i < 6 ; i++) {
880 		ndev->config.mac[i]		= params->guest_mac[i];
881 		ndev->info.guest_mac.addr[i]	= params->guest_mac[i];
882 		ndev->info.host_mac.addr[i]	= params->host_mac[i];
883 	}
884 
885 	ndev->mode = params->mode;
886 	if (ndev->mode == NET_MODE_TAP) {
887 		ndev->ops = &tap_ops;
888 		if (!virtio_net__tap_create(ndev))
889 			die_perror("You have requested a TAP device, but creation of one has failed because");
890 	} else {
891 		ndev->info.host_ip		= ntohl(inet_addr(params->host_ip));
892 		ndev->info.guest_ip		= ntohl(inet_addr(params->guest_ip));
893 		ndev->info.guest_netmask	= ntohl(inet_addr("255.255.255.0"));
894 		ndev->info.buf_nr		= 20,
895 		ndev->ops = &uip_ops;
896 		uip_static_init(&ndev->info);
897 	}
898 
899 	*ops = net_dev_virtio_ops;
900 
901 	if (params->trans) {
902 		if (strcmp(params->trans, "mmio") == 0)
903 			trans = VIRTIO_MMIO;
904 		else if (strcmp(params->trans, "pci") == 0)
905 			trans = VIRTIO_PCI;
906 		else
907 			pr_warning("virtio-net: Unknown transport method : %s, "
908 				   "falling back to %s.", params->trans,
909 				   virtio_trans_name(trans));
910 	}
911 
912 	r = virtio_init(params->kvm, ndev, &ndev->vdev, ops, trans,
913 			PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET);
914 	if (r < 0) {
915 		free(ops);
916 		return r;
917 	}
918 
919 	if (params->vhost)
920 		virtio_net__vhost_init(params->kvm, ndev);
921 
922 	if (compat_id == -1)
923 		compat_id = virtio_compat_add_message("virtio-net", "CONFIG_VIRTIO_NET");
924 
925 	return 0;
926 }
927 
928 int virtio_net__init(struct kvm *kvm)
929 {
930 	int i, r;
931 
932 	for (i = 0; i < kvm->cfg.num_net_devices; i++) {
933 		kvm->cfg.net_params[i].kvm = kvm;
934 		r = virtio_net__init_one(&kvm->cfg.net_params[i]);
935 		if (r < 0)
936 			goto cleanup;
937 	}
938 
939 	if (kvm->cfg.num_net_devices == 0 && kvm->cfg.no_net == 0) {
940 		static struct virtio_net_params net_params;
941 
942 		net_params = (struct virtio_net_params) {
943 			.guest_ip	= kvm->cfg.guest_ip,
944 			.host_ip	= kvm->cfg.host_ip,
945 			.kvm		= kvm,
946 			.script		= kvm->cfg.script,
947 			.mode		= NET_MODE_USER,
948 		};
949 		str_to_mac(kvm->cfg.guest_mac, net_params.guest_mac);
950 		str_to_mac(kvm->cfg.host_mac, net_params.host_mac);
951 
952 		r = virtio_net__init_one(&net_params);
953 		if (r < 0)
954 			goto cleanup;
955 	}
956 
957 	return 0;
958 
959 cleanup:
960 	virtio_net__exit(kvm);
961 	return r;
962 }
963 virtio_dev_init(virtio_net__init);
964 
965 int virtio_net__exit(struct kvm *kvm)
966 {
967 	struct virtio_net_params *params;
968 	struct net_dev *ndev;
969 	struct list_head *ptr, *n;
970 
971 	list_for_each_safe(ptr, n, &ndevs) {
972 		ndev = list_entry(ptr, struct net_dev, list);
973 		params = ndev->params;
974 		/* Cleanup any tap device which attached to bridge */
975 		if (ndev->mode == NET_MODE_TAP &&
976 		    strcmp(params->downscript, "none"))
977 			virtio_net_exec_script(params->downscript, ndev->tap_name);
978 
979 		list_del(&ndev->list);
980 		free(ndev);
981 	}
982 	return 0;
983 }
984 virtio_dev_exit(virtio_net__exit);
985