xref: /kvmtool/virtio/net.c (revision d2a7ddff4626b638945fd141891379a553fd0d88)
1 #include "kvm/virtio-pci-dev.h"
2 #include "kvm/virtio-net.h"
3 #include "kvm/virtio.h"
4 #include "kvm/types.h"
5 #include "kvm/mutex.h"
6 #include "kvm/util.h"
7 #include "kvm/kvm.h"
8 #include "kvm/irq.h"
9 #include "kvm/uip.h"
10 #include "kvm/guest_compat.h"
11 #include "kvm/iovec.h"
12 
13 #include <linux/vhost.h>
14 #include <linux/virtio_net.h>
15 #include <linux/if_tun.h>
16 #include <linux/types.h>
17 
18 #include <arpa/inet.h>
19 #include <net/if.h>
20 
21 #include <unistd.h>
22 #include <fcntl.h>
23 
24 #include <sys/socket.h>
25 #include <sys/ioctl.h>
26 #include <sys/types.h>
27 #include <sys/wait.h>
28 #include <sys/eventfd.h>
29 
30 #define VIRTIO_NET_QUEUE_SIZE		256
31 #define VIRTIO_NET_NUM_QUEUES		8
32 
33 struct net_dev;
34 
35 struct net_dev_operations {
36 	int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
37 	int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
38 };
39 
40 struct net_dev {
41 	struct mutex			mutex;
42 	struct virtio_device		vdev;
43 	struct list_head		list;
44 
45 	struct virt_queue		vqs[VIRTIO_NET_NUM_QUEUES * 2 + 1];
46 	struct virtio_net_config	config;
47 	u32				features, rx_vqs, tx_vqs, queue_pairs;
48 
49 	pthread_t			io_thread[VIRTIO_NET_NUM_QUEUES * 2 + 1];
50 	struct mutex			io_lock[VIRTIO_NET_NUM_QUEUES * 2 + 1];
51 	pthread_cond_t			io_cond[VIRTIO_NET_NUM_QUEUES * 2 + 1];
52 
53 	int				vhost_fd;
54 	int				tap_fd;
55 	char				tap_name[IFNAMSIZ];
56 
57 	int				mode;
58 
59 	struct uip_info			info;
60 	struct net_dev_operations	*ops;
61 	struct kvm			*kvm;
62 
63 	struct virtio_net_params	*params;
64 };
65 
66 static LIST_HEAD(ndevs);
67 static int compat_id = -1;
68 
69 #define MAX_PACKET_SIZE 65550
70 
71 static bool has_virtio_feature(struct net_dev *ndev, u32 feature)
72 {
73 	return ndev->features & (1 << feature);
74 }
75 
76 static void virtio_net_fix_tx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev)
77 {
78 	hdr->hdr_len		= virtio_guest_to_host_u16(&ndev->vdev, hdr->hdr_len);
79 	hdr->gso_size		= virtio_guest_to_host_u16(&ndev->vdev, hdr->gso_size);
80 	hdr->csum_start		= virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_start);
81 	hdr->csum_offset	= virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_offset);
82 }
83 
84 static void virtio_net_fix_rx_hdr(struct virtio_net_hdr_mrg_rxbuf *hdr, struct net_dev *ndev)
85 {
86 	hdr->hdr.hdr_len	= virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr.hdr_len);
87 	hdr->hdr.gso_size	= virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr.gso_size);
88 	hdr->hdr.csum_start	= virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr.csum_start);
89 	hdr->hdr.csum_offset	= virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr.csum_offset);
90 	if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF))
91 		hdr->num_buffers	= virtio_host_to_guest_u16(&ndev->vdev, hdr->num_buffers);
92 }
93 
94 static void *virtio_net_rx_thread(void *p)
95 {
96 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
97 	struct virt_queue *vq;
98 	struct kvm *kvm;
99 	struct net_dev *ndev = p;
100 	u16 out, in;
101 	u16 head;
102 	int len, copied;
103 	u32 id;
104 
105 	mutex_lock(&ndev->mutex);
106 	id = ndev->rx_vqs++ * 2;
107 	mutex_unlock(&ndev->mutex);
108 
109 	kvm__set_thread_name("virtio-net-rx");
110 
111 	kvm = ndev->kvm;
112 	vq = &ndev->vqs[id];
113 
114 	while (1) {
115 		mutex_lock(&ndev->io_lock[id]);
116 		if (!virt_queue__available(vq))
117 			pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex);
118 		mutex_unlock(&ndev->io_lock[id]);
119 
120 		while (virt_queue__available(vq)) {
121 			unsigned char buffer[MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)];
122 			struct iovec dummy_iov = {
123 				.iov_base = buffer,
124 				.iov_len  = sizeof(buffer),
125 			};
126 			struct virtio_net_hdr_mrg_rxbuf *hdr;
127 			int i;
128 
129 			len = ndev->ops->rx(&dummy_iov, 1, ndev);
130 			if (len < 0) {
131 				pr_warning("%s: rx on vq %u failed (%d), exiting thread\n",
132 						__func__, id, len);
133 				goto out_err;
134 			}
135 
136 			copied = i = 0;
137 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
138 			hdr = iov[0].iov_base;
139 			while (copied < len) {
140 				size_t iovsize = min_t(size_t, len - copied, iov_size(iov, in));
141 
142 				memcpy_toiovec(iov, buffer + copied, iovsize);
143 				copied += iovsize;
144 				if (i++ == 0)
145 					virtio_net_fix_rx_hdr(hdr, ndev);
146 				if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF)) {
147 					u16 num_buffers = virtio_guest_to_host_u16(vq, hdr->num_buffers);
148 					hdr->num_buffers = virtio_host_to_guest_u16(vq, num_buffers + 1);
149 				}
150 				virt_queue__set_used_elem(vq, head, iovsize);
151 				if (copied == len)
152 					break;
153 				while (!virt_queue__available(vq))
154 					sleep(0);
155 				head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
156 			}
157 			/* We should interrupt guest right now, otherwise latency is huge. */
158 			if (virtio_queue__should_signal(vq))
159 				ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
160 		}
161 	}
162 
163 out_err:
164 	pthread_exit(NULL);
165 	return NULL;
166 
167 }
168 
169 static void *virtio_net_tx_thread(void *p)
170 {
171 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
172 	struct virt_queue *vq;
173 	struct kvm *kvm;
174 	struct net_dev *ndev = p;
175 	u16 out, in;
176 	u16 head;
177 	int len;
178 	u32 id;
179 
180 	mutex_lock(&ndev->mutex);
181 	id = ndev->tx_vqs++ * 2 + 1;
182 	mutex_unlock(&ndev->mutex);
183 
184 	kvm__set_thread_name("virtio-net-tx");
185 
186 	kvm = ndev->kvm;
187 	vq = &ndev->vqs[id];
188 
189 	while (1) {
190 		mutex_lock(&ndev->io_lock[id]);
191 		if (!virt_queue__available(vq))
192 			pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex);
193 		mutex_unlock(&ndev->io_lock[id]);
194 
195 		while (virt_queue__available(vq)) {
196 			struct virtio_net_hdr *hdr;
197 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
198 			hdr = iov[0].iov_base;
199 			virtio_net_fix_tx_hdr(hdr, ndev);
200 			len = ndev->ops->tx(iov, out, ndev);
201 			if (len < 0) {
202 				pr_warning("%s: tx on vq %u failed (%d)\n",
203 						__func__, id, errno);
204 				goto out_err;
205 			}
206 
207 			virt_queue__set_used_elem(vq, head, len);
208 		}
209 
210 		if (virtio_queue__should_signal(vq))
211 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
212 	}
213 
214 out_err:
215 	pthread_exit(NULL);
216 	return NULL;
217 }
218 
219 static virtio_net_ctrl_ack virtio_net_handle_mq(struct kvm* kvm, struct net_dev *ndev, struct virtio_net_ctrl_hdr *ctrl)
220 {
221 	/* Not much to do here */
222 	return VIRTIO_NET_OK;
223 }
224 
225 static void *virtio_net_ctrl_thread(void *p)
226 {
227 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
228 	u16 out, in, head;
229 	struct net_dev *ndev = p;
230 	struct kvm *kvm = ndev->kvm;
231 	u32 id = ndev->queue_pairs * 2;
232 	struct virt_queue *vq = &ndev->vqs[id];
233 	struct virtio_net_ctrl_hdr *ctrl;
234 	virtio_net_ctrl_ack *ack;
235 
236 	while (1) {
237 		mutex_lock(&ndev->io_lock[id]);
238 		if (!virt_queue__available(vq))
239 			pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex);
240 		mutex_unlock(&ndev->io_lock[id]);
241 
242 		while (virt_queue__available(vq)) {
243 			head = virt_queue__get_iov(&ndev->vqs[id], iov, &out, &in, kvm);
244 			ctrl = iov[0].iov_base;
245 			ack = iov[out].iov_base;
246 
247 			switch (ctrl->class) {
248 			case VIRTIO_NET_CTRL_MQ:
249 				*ack = virtio_net_handle_mq(kvm, ndev, ctrl);
250 				break;
251 			default:
252 				*ack = VIRTIO_NET_ERR;
253 				break;
254 			}
255 			virt_queue__set_used_elem(&ndev->vqs[id], head, iov[out].iov_len);
256 		}
257 
258 		if (virtio_queue__should_signal(&ndev->vqs[id]))
259 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
260 	}
261 
262 	pthread_exit(NULL);
263 
264 	return NULL;
265 }
266 
267 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue)
268 {
269 	if ((u32)queue >= (ndev->queue_pairs * 2 + 1)) {
270 		pr_warning("Unknown queue index %u", queue);
271 		return;
272 	}
273 
274 	mutex_lock(&ndev->io_lock[queue]);
275 	pthread_cond_signal(&ndev->io_cond[queue]);
276 	mutex_unlock(&ndev->io_lock[queue]);
277 }
278 
279 static int virtio_net_request_tap(struct net_dev *ndev, struct ifreq *ifr,
280 				  const char *tapname)
281 {
282 	int ret;
283 
284 	memset(ifr, 0, sizeof(*ifr));
285 	ifr->ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
286 	if (tapname)
287 		strncpy(ifr->ifr_name, tapname, sizeof(ifr->ifr_name));
288 
289 	ret = ioctl(ndev->tap_fd, TUNSETIFF, &ifr);
290 
291 	if (ret >= 0)
292 		strncpy(ndev->tap_name, ifr->ifr_name, sizeof(ndev->tap_name));
293 	return ret;
294 }
295 
296 static bool virtio_net__tap_init(struct net_dev *ndev)
297 {
298 	int sock = socket(AF_INET, SOCK_STREAM, 0);
299 	int pid, status, offload, hdr_len;
300 	struct sockaddr_in sin = {0};
301 	struct ifreq ifr;
302 	const struct virtio_net_params *params = ndev->params;
303 	bool skipconf = !!params->tapif;
304 	bool macvtap = skipconf && (params->tapif[0] == '/');
305 	const char *tap_file = "/dev/net/tun";
306 
307 	/* Did the user already gave us the FD? */
308 	if (params->fd) {
309 		ndev->tap_fd = params->fd;
310 		return 1;
311 	}
312 
313 	if (macvtap)
314 		tap_file = params->tapif;
315 
316 	ndev->tap_fd = open(tap_file, O_RDWR);
317 	if (ndev->tap_fd < 0) {
318 		pr_warning("Unable to open %s", tap_file);
319 		goto fail;
320 	}
321 
322 	if (!macvtap &&
323 	    virtio_net_request_tap(ndev, &ifr, params->tapif) < 0) {
324 		pr_warning("Config tap device error. Are you root?");
325 		goto fail;
326 	}
327 
328 	hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ?
329 			sizeof(struct virtio_net_hdr_mrg_rxbuf) :
330 			sizeof(struct virtio_net_hdr);
331 	if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
332 		pr_warning("Config tap device TUNSETVNETHDRSZ error");
333 
334 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
335 	if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
336 		pr_warning("Config tap device TUNSETOFFLOAD error");
337 		goto fail;
338 	}
339 
340 	if (strcmp(params->script, "none")) {
341 		pid = fork();
342 		if (pid == 0) {
343 			execl(params->script, params->script, ndev->tap_name, NULL);
344 			_exit(1);
345 		} else {
346 			waitpid(pid, &status, 0);
347 			if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
348 				pr_warning("Fail to setup tap by %s", params->script);
349 				goto fail;
350 			}
351 		}
352 	} else if (!skipconf) {
353 		memset(&ifr, 0, sizeof(ifr));
354 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
355 		sin.sin_addr.s_addr = inet_addr(params->host_ip);
356 		memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
357 		ifr.ifr_addr.sa_family = AF_INET;
358 		if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
359 			pr_warning("Could not set ip address on tap device");
360 			goto fail;
361 		}
362 	}
363 
364 	if (!skipconf) {
365 		memset(&ifr, 0, sizeof(ifr));
366 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
367 		ioctl(sock, SIOCGIFFLAGS, &ifr);
368 		ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
369 		if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
370 			pr_warning("Could not bring tap device up");
371 	}
372 
373 	close(sock);
374 
375 	return 1;
376 
377 fail:
378 	if (sock >= 0)
379 		close(sock);
380 	if (ndev->tap_fd >= 0)
381 		close(ndev->tap_fd);
382 
383 	return 0;
384 }
385 
386 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
387 {
388 	return writev(ndev->tap_fd, iov, out);
389 }
390 
391 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
392 {
393 	return readv(ndev->tap_fd, iov, in);
394 }
395 
396 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
397 {
398 	return uip_tx(iov, out, &ndev->info);
399 }
400 
401 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
402 {
403 	return uip_rx(iov, in, &ndev->info);
404 }
405 
406 static struct net_dev_operations tap_ops = {
407 	.rx	= tap_ops_rx,
408 	.tx	= tap_ops_tx,
409 };
410 
411 static struct net_dev_operations uip_ops = {
412 	.rx	= uip_ops_rx,
413 	.tx	= uip_ops_tx,
414 };
415 
416 static u8 *get_config(struct kvm *kvm, void *dev)
417 {
418 	struct net_dev *ndev = dev;
419 
420 	return ((u8 *)(&ndev->config));
421 }
422 
423 static u32 get_host_features(struct kvm *kvm, void *dev)
424 {
425 	struct net_dev *ndev = dev;
426 
427 	return 1UL << VIRTIO_NET_F_MAC
428 		| 1UL << VIRTIO_NET_F_CSUM
429 		| 1UL << VIRTIO_NET_F_HOST_UFO
430 		| 1UL << VIRTIO_NET_F_HOST_TSO4
431 		| 1UL << VIRTIO_NET_F_HOST_TSO6
432 		| 1UL << VIRTIO_NET_F_GUEST_UFO
433 		| 1UL << VIRTIO_NET_F_GUEST_TSO4
434 		| 1UL << VIRTIO_NET_F_GUEST_TSO6
435 		| 1UL << VIRTIO_RING_F_EVENT_IDX
436 		| 1UL << VIRTIO_RING_F_INDIRECT_DESC
437 		| 1UL << VIRTIO_NET_F_CTRL_VQ
438 		| 1UL << VIRTIO_NET_F_MRG_RXBUF
439 		| 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0);
440 }
441 
442 static int virtio_net__vhost_set_features(struct net_dev *ndev)
443 {
444 	u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX;
445 	u64 vhost_features;
446 
447 	if (ioctl(ndev->vhost_fd, VHOST_GET_FEATURES, &vhost_features) != 0)
448 		die_perror("VHOST_GET_FEATURES failed");
449 
450 	/* make sure both side support mergable rx buffers */
451 	if (vhost_features & 1UL << VIRTIO_NET_F_MRG_RXBUF &&
452 			has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF))
453 		features |= 1UL << VIRTIO_NET_F_MRG_RXBUF;
454 
455 	return ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features);
456 }
457 
458 static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
459 {
460 	struct net_dev *ndev = dev;
461 	struct virtio_net_config *conf = &ndev->config;
462 
463 	ndev->features = features;
464 
465 	conf->status = virtio_host_to_guest_u16(&ndev->vdev, conf->status);
466 	conf->max_virtqueue_pairs = virtio_host_to_guest_u16(&ndev->vdev,
467 							     conf->max_virtqueue_pairs);
468 
469 	if (ndev->mode == NET_MODE_TAP) {
470 		if (!virtio_net__tap_init(ndev))
471 			die_perror("You have requested a TAP device, but creation of one has failed because");
472 		if (ndev->vhost_fd &&
473 				virtio_net__vhost_set_features(ndev) != 0)
474 			die_perror("VHOST_SET_FEATURES failed");
475 	} else {
476 		ndev->info.vnet_hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ?
477 						sizeof(struct virtio_net_hdr_mrg_rxbuf) :
478 						sizeof(struct virtio_net_hdr);
479 		uip_init(&ndev->info);
480 	}
481 }
482 
483 static bool is_ctrl_vq(struct net_dev *ndev, u32 vq)
484 {
485 	return vq == (u32)(ndev->queue_pairs * 2);
486 }
487 
488 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 page_size, u32 align,
489 		   u32 pfn)
490 {
491 	struct vhost_vring_state state = { .index = vq };
492 	struct vhost_vring_addr addr;
493 	struct net_dev *ndev = dev;
494 	struct virt_queue *queue;
495 	void *p;
496 	int r;
497 
498 	compat__remove_message(compat_id);
499 
500 	queue		= &ndev->vqs[vq];
501 	queue->pfn	= pfn;
502 	p		= virtio_get_vq(kvm, queue->pfn, page_size);
503 
504 	vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, align);
505 	virtio_init_device_vq(&ndev->vdev, queue);
506 
507 	mutex_init(&ndev->io_lock[vq]);
508 	pthread_cond_init(&ndev->io_cond[vq], NULL);
509 	if (is_ctrl_vq(ndev, vq)) {
510 		pthread_create(&ndev->io_thread[vq], NULL, virtio_net_ctrl_thread, ndev);
511 
512 		return 0;
513 	} else if (ndev->vhost_fd == 0 ) {
514 		if (vq & 1)
515 			pthread_create(&ndev->io_thread[vq], NULL, virtio_net_tx_thread, ndev);
516 		else
517 			pthread_create(&ndev->io_thread[vq], NULL, virtio_net_rx_thread, ndev);
518 
519 		return 0;
520 	}
521 
522 	if (queue->endian != VIRTIO_ENDIAN_HOST)
523 		die_perror("VHOST requires VIRTIO_ENDIAN_HOST");
524 
525 	state.num = queue->vring.num;
526 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state);
527 	if (r < 0)
528 		die_perror("VHOST_SET_VRING_NUM failed");
529 	state.num = 0;
530 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state);
531 	if (r < 0)
532 		die_perror("VHOST_SET_VRING_BASE failed");
533 
534 	addr = (struct vhost_vring_addr) {
535 		.index = vq,
536 		.desc_user_addr = (u64)(unsigned long)queue->vring.desc,
537 		.avail_user_addr = (u64)(unsigned long)queue->vring.avail,
538 		.used_user_addr = (u64)(unsigned long)queue->vring.used,
539 	};
540 
541 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr);
542 	if (r < 0)
543 		die_perror("VHOST_SET_VRING_ADDR failed");
544 
545 	return 0;
546 }
547 
548 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
549 {
550 	struct net_dev *ndev = dev;
551 	struct kvm_irqfd irq;
552 	struct vhost_vring_file file;
553 	int r;
554 
555 	if (ndev->vhost_fd == 0)
556 		return;
557 
558 	irq = (struct kvm_irqfd) {
559 		.gsi	= gsi,
560 		.fd	= eventfd(0, 0),
561 	};
562 	file = (struct vhost_vring_file) {
563 		.index	= vq,
564 		.fd	= irq.fd,
565 	};
566 
567 	r = ioctl(kvm->vm_fd, KVM_IRQFD, &irq);
568 	if (r < 0)
569 		die_perror("KVM_IRQFD failed");
570 
571 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file);
572 	if (r < 0)
573 		die_perror("VHOST_SET_VRING_CALL failed");
574 	file.fd = ndev->tap_fd;
575 	r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file);
576 	if (r != 0)
577 		die("VHOST_NET_SET_BACKEND failed %d", errno);
578 
579 }
580 
581 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
582 {
583 	struct net_dev *ndev = dev;
584 	struct vhost_vring_file file = {
585 		.index	= vq,
586 		.fd	= efd,
587 	};
588 	int r;
589 
590 	if (ndev->vhost_fd == 0 || is_ctrl_vq(ndev, vq))
591 		return;
592 
593 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file);
594 	if (r < 0)
595 		die_perror("VHOST_SET_VRING_KICK failed");
596 }
597 
598 static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
599 {
600 	struct net_dev *ndev = dev;
601 
602 	virtio_net_handle_callback(kvm, ndev, vq);
603 
604 	return 0;
605 }
606 
607 static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq)
608 {
609 	struct net_dev *ndev = dev;
610 
611 	return ndev->vqs[vq].pfn;
612 }
613 
614 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
615 {
616 	/* FIXME: dynamic */
617 	return VIRTIO_NET_QUEUE_SIZE;
618 }
619 
620 static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size)
621 {
622 	/* FIXME: dynamic */
623 	return size;
624 }
625 
626 static struct virtio_ops net_dev_virtio_ops = (struct virtio_ops) {
627 	.get_config		= get_config,
628 	.get_host_features	= get_host_features,
629 	.set_guest_features	= set_guest_features,
630 	.init_vq		= init_vq,
631 	.get_pfn_vq		= get_pfn_vq,
632 	.get_size_vq		= get_size_vq,
633 	.set_size_vq		= set_size_vq,
634 	.notify_vq		= notify_vq,
635 	.notify_vq_gsi		= notify_vq_gsi,
636 	.notify_vq_eventfd	= notify_vq_eventfd,
637 };
638 
639 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev)
640 {
641 	struct vhost_memory *mem;
642 	int r;
643 
644 	ndev->vhost_fd = open("/dev/vhost-net", O_RDWR);
645 	if (ndev->vhost_fd < 0)
646 		die_perror("Failed openning vhost-net device");
647 
648 	mem = calloc(1, sizeof(*mem) + sizeof(struct vhost_memory_region));
649 	if (mem == NULL)
650 		die("Failed allocating memory for vhost memory map");
651 
652 	mem->nregions = 1;
653 	mem->regions[0] = (struct vhost_memory_region) {
654 		.guest_phys_addr	= 0,
655 		.memory_size		= kvm->ram_size,
656 		.userspace_addr		= (unsigned long)kvm->ram_start,
657 	};
658 
659 	r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER);
660 	if (r != 0)
661 		die_perror("VHOST_SET_OWNER failed");
662 
663 	r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem);
664 	if (r != 0)
665 		die_perror("VHOST_SET_MEM_TABLE failed");
666 
667 	ndev->vdev.use_vhost = true;
668 
669 	free(mem);
670 }
671 
672 static inline void str_to_mac(const char *str, char *mac)
673 {
674 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
675 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
676 }
677 static int set_net_param(struct kvm *kvm, struct virtio_net_params *p,
678 			const char *param, const char *val)
679 {
680 	if (strcmp(param, "guest_mac") == 0) {
681 		str_to_mac(val, p->guest_mac);
682 	} else if (strcmp(param, "mode") == 0) {
683 		if (!strncmp(val, "user", 4)) {
684 			int i;
685 
686 			for (i = 0; i < kvm->cfg.num_net_devices; i++)
687 				if (kvm->cfg.net_params[i].mode == NET_MODE_USER)
688 					die("Only one usermode network device allowed at a time");
689 			p->mode = NET_MODE_USER;
690 		} else if (!strncmp(val, "tap", 3)) {
691 			p->mode = NET_MODE_TAP;
692 		} else if (!strncmp(val, "none", 4)) {
693 			kvm->cfg.no_net = 1;
694 			return -1;
695 		} else
696 			die("Unknown network mode %s, please use user, tap or none", kvm->cfg.network);
697 	} else if (strcmp(param, "script") == 0) {
698 		p->script = strdup(val);
699 	} else if (strcmp(param, "guest_ip") == 0) {
700 		p->guest_ip = strdup(val);
701 	} else if (strcmp(param, "host_ip") == 0) {
702 		p->host_ip = strdup(val);
703 	} else if (strcmp(param, "trans") == 0) {
704 		p->trans = strdup(val);
705 	} else if (strcmp(param, "tapif") == 0) {
706 		p->tapif = strdup(val);
707 	} else if (strcmp(param, "vhost") == 0) {
708 		p->vhost = atoi(val);
709 	} else if (strcmp(param, "fd") == 0) {
710 		p->fd = atoi(val);
711 	} else if (strcmp(param, "mq") == 0) {
712 		p->mq = atoi(val);
713 	} else
714 		die("Unknown network parameter %s", param);
715 
716 	return 0;
717 }
718 
719 int netdev_parser(const struct option *opt, const char *arg, int unset)
720 {
721 	struct virtio_net_params p;
722 	char *buf = NULL, *cmd = NULL, *cur = NULL;
723 	bool on_cmd = true;
724 	struct kvm *kvm = opt->ptr;
725 
726 	if (arg) {
727 		buf = strdup(arg);
728 		if (buf == NULL)
729 			die("Failed allocating new net buffer");
730 		cur = strtok(buf, ",=");
731 	}
732 
733 	p = (struct virtio_net_params) {
734 		.guest_ip	= DEFAULT_GUEST_ADDR,
735 		.host_ip	= DEFAULT_HOST_ADDR,
736 		.script		= DEFAULT_SCRIPT,
737 		.mode		= NET_MODE_TAP,
738 	};
739 
740 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
741 	p.guest_mac[5] += kvm->cfg.num_net_devices;
742 
743 	while (cur) {
744 		if (on_cmd) {
745 			cmd = cur;
746 		} else {
747 			if (set_net_param(kvm, &p, cmd, cur) < 0)
748 				goto done;
749 		}
750 		on_cmd = !on_cmd;
751 
752 		cur = strtok(NULL, ",=");
753 	};
754 
755 	kvm->cfg.num_net_devices++;
756 
757 	kvm->cfg.net_params = realloc(kvm->cfg.net_params, kvm->cfg.num_net_devices * sizeof(*kvm->cfg.net_params));
758 	if (kvm->cfg.net_params == NULL)
759 		die("Failed adding new network device");
760 
761 	kvm->cfg.net_params[kvm->cfg.num_net_devices - 1] = p;
762 
763 done:
764 	free(buf);
765 	return 0;
766 }
767 
768 static int virtio_net__init_one(struct virtio_net_params *params)
769 {
770 	int i, err;
771 	struct net_dev *ndev;
772 	struct virtio_ops *ops;
773 	enum virtio_trans trans = VIRTIO_DEFAULT_TRANS(params->kvm);
774 
775 	ndev = calloc(1, sizeof(struct net_dev));
776 	if (ndev == NULL)
777 		return -ENOMEM;
778 
779 	ops = malloc(sizeof(*ops));
780 	if (ops == NULL) {
781 		err = -ENOMEM;
782 		goto err_free_ndev;
783 	}
784 
785 	list_add_tail(&ndev->list, &ndevs);
786 
787 	ndev->kvm = params->kvm;
788 	ndev->params = params;
789 
790 	mutex_init(&ndev->mutex);
791 	ndev->queue_pairs = max(1, min(VIRTIO_NET_NUM_QUEUES, params->mq));
792 	ndev->config.status = VIRTIO_NET_S_LINK_UP;
793 	if (ndev->queue_pairs > 1)
794 		ndev->config.max_virtqueue_pairs = ndev->queue_pairs;
795 
796 	for (i = 0 ; i < 6 ; i++) {
797 		ndev->config.mac[i]		= params->guest_mac[i];
798 		ndev->info.guest_mac.addr[i]	= params->guest_mac[i];
799 		ndev->info.host_mac.addr[i]	= params->host_mac[i];
800 	}
801 
802 	ndev->mode = params->mode;
803 	if (ndev->mode == NET_MODE_TAP) {
804 		ndev->ops = &tap_ops;
805 	} else {
806 		ndev->info.host_ip		= ntohl(inet_addr(params->host_ip));
807 		ndev->info.guest_ip		= ntohl(inet_addr(params->guest_ip));
808 		ndev->info.guest_netmask	= ntohl(inet_addr("255.255.255.0"));
809 		ndev->info.buf_nr		= 20,
810 		ndev->ops = &uip_ops;
811 		uip_static_init(&ndev->info);
812 	}
813 
814 	*ops = net_dev_virtio_ops;
815 
816 	if (params->trans) {
817 		if (strcmp(params->trans, "mmio") == 0)
818 			trans = VIRTIO_MMIO;
819 		else if (strcmp(params->trans, "pci") == 0)
820 			trans = VIRTIO_PCI;
821 		else
822 			pr_warning("virtio-net: Unknown transport method : %s, "
823 				   "falling back to %s.", params->trans,
824 				   virtio_trans_name(trans));
825 	}
826 
827 	virtio_init(params->kvm, ndev, &ndev->vdev, ops, trans,
828 		    PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET);
829 
830 	if (params->vhost)
831 		virtio_net__vhost_init(params->kvm, ndev);
832 
833 	if (compat_id == -1)
834 		compat_id = virtio_compat_add_message("virtio-net", "CONFIG_VIRTIO_NET");
835 
836 	return 0;
837 
838 err_free_ndev:
839 	free(ndev);
840 	return err;
841 }
842 
843 int virtio_net__init(struct kvm *kvm)
844 {
845 	int i;
846 
847 	for (i = 0; i < kvm->cfg.num_net_devices; i++) {
848 		kvm->cfg.net_params[i].kvm = kvm;
849 		virtio_net__init_one(&kvm->cfg.net_params[i]);
850 	}
851 
852 	if (kvm->cfg.num_net_devices == 0 && kvm->cfg.no_net == 0) {
853 		static struct virtio_net_params net_params;
854 
855 		net_params = (struct virtio_net_params) {
856 			.guest_ip	= kvm->cfg.guest_ip,
857 			.host_ip	= kvm->cfg.host_ip,
858 			.kvm		= kvm,
859 			.script		= kvm->cfg.script,
860 			.mode		= NET_MODE_USER,
861 		};
862 		str_to_mac(kvm->cfg.guest_mac, net_params.guest_mac);
863 		str_to_mac(kvm->cfg.host_mac, net_params.host_mac);
864 
865 		virtio_net__init_one(&net_params);
866 	}
867 
868 	return 0;
869 }
870 virtio_dev_init(virtio_net__init);
871 
872 int virtio_net__exit(struct kvm *kvm)
873 {
874 	return 0;
875 }
876 virtio_dev_exit(virtio_net__exit);
877