xref: /kvmtool/virtio/net.c (revision a28574790c2eb00cb0c588167dca1b00fa414062)
1 #include "kvm/virtio-pci-dev.h"
2 #include "kvm/virtio-net.h"
3 #include "kvm/virtio.h"
4 #include "kvm/types.h"
5 #include "kvm/mutex.h"
6 #include "kvm/util.h"
7 #include "kvm/kvm.h"
8 #include "kvm/irq.h"
9 #include "kvm/uip.h"
10 #include "kvm/guest_compat.h"
11 #include "kvm/virtio-trans.h"
12 
13 #include <linux/vhost.h>
14 #include <linux/virtio_net.h>
15 #include <linux/if_tun.h>
16 #include <linux/types.h>
17 
18 #include <arpa/inet.h>
19 #include <net/if.h>
20 
21 #include <unistd.h>
22 #include <fcntl.h>
23 
24 #include <sys/socket.h>
25 #include <sys/ioctl.h>
26 #include <sys/types.h>
27 #include <sys/wait.h>
28 #include <sys/eventfd.h>
29 
30 #define VIRTIO_NET_QUEUE_SIZE		128
31 #define VIRTIO_NET_NUM_QUEUES		2
32 #define VIRTIO_NET_RX_QUEUE		0
33 #define VIRTIO_NET_TX_QUEUE		1
34 
35 struct net_dev;
36 
37 extern struct kvm *kvm;
38 
39 struct net_dev_operations {
40 	int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
41 	int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
42 };
43 
44 struct net_dev {
45 	pthread_mutex_t			mutex;
46 	struct virtio_trans		vtrans;
47 	struct list_head		list;
48 
49 	struct virt_queue		vqs[VIRTIO_NET_NUM_QUEUES];
50 	struct virtio_net_config	config;
51 	u32				features;
52 
53 	pthread_t			io_rx_thread;
54 	pthread_mutex_t			io_rx_lock;
55 	pthread_cond_t			io_rx_cond;
56 
57 	pthread_t			io_tx_thread;
58 	pthread_mutex_t			io_tx_lock;
59 	pthread_cond_t			io_tx_cond;
60 
61 	int				vhost_fd;
62 	int				tap_fd;
63 	char				tap_name[IFNAMSIZ];
64 
65 	int				mode;
66 
67 	struct uip_info			info;
68 	struct net_dev_operations	*ops;
69 	struct kvm			*kvm;
70 };
71 
72 static LIST_HEAD(ndevs);
73 static int compat_id = -1;
74 
75 static void *virtio_net_rx_thread(void *p)
76 {
77 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
78 	struct virt_queue *vq;
79 	struct kvm *kvm;
80 	struct net_dev *ndev = p;
81 	u16 out, in;
82 	u16 head;
83 	int len;
84 
85 	kvm	= ndev->kvm;
86 	vq	= &ndev->vqs[VIRTIO_NET_RX_QUEUE];
87 
88 	while (1) {
89 		mutex_lock(&ndev->io_rx_lock);
90 		if (!virt_queue__available(vq))
91 			pthread_cond_wait(&ndev->io_rx_cond, &ndev->io_rx_lock);
92 		mutex_unlock(&ndev->io_rx_lock);
93 
94 		while (virt_queue__available(vq)) {
95 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
96 			len = ndev->ops->rx(iov, in, ndev);
97 			virt_queue__set_used_elem(vq, head, len);
98 
99 			/* We should interrupt guest right now, otherwise latency is huge. */
100 			if (virtio_queue__should_signal(&ndev->vqs[VIRTIO_NET_RX_QUEUE]))
101 				ndev->vtrans.trans_ops->signal_vq(kvm, &ndev->vtrans,
102 								VIRTIO_NET_RX_QUEUE);
103 		}
104 	}
105 
106 	pthread_exit(NULL);
107 	return NULL;
108 
109 }
110 
111 static void *virtio_net_tx_thread(void *p)
112 {
113 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
114 	struct virt_queue *vq;
115 	struct kvm *kvm;
116 	struct net_dev *ndev = p;
117 	u16 out, in;
118 	u16 head;
119 	int len;
120 
121 	kvm	= ndev->kvm;
122 	vq	= &ndev->vqs[VIRTIO_NET_TX_QUEUE];
123 
124 	while (1) {
125 		mutex_lock(&ndev->io_tx_lock);
126 		if (!virt_queue__available(vq))
127 			pthread_cond_wait(&ndev->io_tx_cond, &ndev->io_tx_lock);
128 		mutex_unlock(&ndev->io_tx_lock);
129 
130 		while (virt_queue__available(vq)) {
131 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
132 			len = ndev->ops->tx(iov, out, ndev);
133 			virt_queue__set_used_elem(vq, head, len);
134 		}
135 
136 		if (virtio_queue__should_signal(&ndev->vqs[VIRTIO_NET_TX_QUEUE]))
137 			ndev->vtrans.trans_ops->signal_vq(kvm, &ndev->vtrans, VIRTIO_NET_TX_QUEUE);
138 	}
139 
140 	pthread_exit(NULL);
141 
142 	return NULL;
143 
144 }
145 
146 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue)
147 {
148 	switch (queue) {
149 	case VIRTIO_NET_TX_QUEUE:
150 		mutex_lock(&ndev->io_tx_lock);
151 		pthread_cond_signal(&ndev->io_tx_cond);
152 		mutex_unlock(&ndev->io_tx_lock);
153 		break;
154 	case VIRTIO_NET_RX_QUEUE:
155 		mutex_lock(&ndev->io_rx_lock);
156 		pthread_cond_signal(&ndev->io_rx_cond);
157 		mutex_unlock(&ndev->io_rx_lock);
158 		break;
159 	default:
160 		pr_warning("Unknown queue index %u", queue);
161 	}
162 }
163 
164 static bool virtio_net__tap_init(const struct virtio_net_params *params,
165 					struct net_dev *ndev)
166 {
167 	int sock = socket(AF_INET, SOCK_STREAM, 0);
168 	int pid, status, offload, hdr_len;
169 	struct sockaddr_in sin = {0};
170 	struct ifreq ifr;
171 
172 	/* Did the user already gave us the FD? */
173 	if (params->fd) {
174 		ndev->tap_fd = params->fd;
175 		return 1;
176 	}
177 
178 	ndev->tap_fd = open("/dev/net/tun", O_RDWR);
179 	if (ndev->tap_fd < 0) {
180 		pr_warning("Unable to open /dev/net/tun");
181 		goto fail;
182 	}
183 
184 	memset(&ifr, 0, sizeof(ifr));
185 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
186 	if (ioctl(ndev->tap_fd, TUNSETIFF, &ifr) < 0) {
187 		pr_warning("Config tap device error. Are you root?");
188 		goto fail;
189 	}
190 
191 	strncpy(ndev->tap_name, ifr.ifr_name, sizeof(ndev->tap_name));
192 
193 	if (ioctl(ndev->tap_fd, TUNSETNOCSUM, 1) < 0) {
194 		pr_warning("Config tap device TUNSETNOCSUM error");
195 		goto fail;
196 	}
197 
198 	hdr_len = sizeof(struct virtio_net_hdr);
199 	if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
200 		pr_warning("Config tap device TUNSETVNETHDRSZ error");
201 
202 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
203 	if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
204 		pr_warning("Config tap device TUNSETOFFLOAD error");
205 		goto fail;
206 	}
207 
208 	if (strcmp(params->script, "none")) {
209 		pid = fork();
210 		if (pid == 0) {
211 			execl(params->script, params->script, ndev->tap_name, NULL);
212 			_exit(1);
213 		} else {
214 			waitpid(pid, &status, 0);
215 			if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
216 				pr_warning("Fail to setup tap by %s", params->script);
217 				goto fail;
218 			}
219 		}
220 	} else {
221 		memset(&ifr, 0, sizeof(ifr));
222 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
223 		sin.sin_addr.s_addr = inet_addr(params->host_ip);
224 		memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
225 		ifr.ifr_addr.sa_family = AF_INET;
226 		if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
227 			pr_warning("Could not set ip address on tap device");
228 			goto fail;
229 		}
230 	}
231 
232 	memset(&ifr, 0, sizeof(ifr));
233 	strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
234 	ioctl(sock, SIOCGIFFLAGS, &ifr);
235 	ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
236 	if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
237 		pr_warning("Could not bring tap device up");
238 
239 	close(sock);
240 
241 	return 1;
242 
243 fail:
244 	if (sock >= 0)
245 		close(sock);
246 	if (ndev->tap_fd >= 0)
247 		close(ndev->tap_fd);
248 
249 	return 0;
250 }
251 
252 static void virtio_net__io_thread_init(struct kvm *kvm, struct net_dev *ndev)
253 {
254 	pthread_mutex_init(&ndev->io_tx_lock, NULL);
255 	pthread_mutex_init(&ndev->io_rx_lock, NULL);
256 
257 	pthread_cond_init(&ndev->io_tx_cond, NULL);
258 	pthread_cond_init(&ndev->io_rx_cond, NULL);
259 
260 	pthread_create(&ndev->io_tx_thread, NULL, virtio_net_tx_thread, ndev);
261 	pthread_create(&ndev->io_rx_thread, NULL, virtio_net_rx_thread, ndev);
262 }
263 
264 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
265 {
266 	return writev(ndev->tap_fd, iov, out);
267 }
268 
269 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
270 {
271 	return readv(ndev->tap_fd, iov, in);
272 }
273 
274 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
275 {
276 	return uip_tx(iov, out, &ndev->info);
277 }
278 
279 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
280 {
281 	return uip_rx(iov, in, &ndev->info);
282 }
283 
284 static struct net_dev_operations tap_ops = {
285 	.rx	= tap_ops_rx,
286 	.tx	= tap_ops_tx,
287 };
288 
289 static struct net_dev_operations uip_ops = {
290 	.rx	= uip_ops_rx,
291 	.tx	= uip_ops_tx,
292 };
293 
294 static void set_config(struct kvm *kvm, void *dev, u8 data, u32 offset)
295 {
296 	struct net_dev *ndev = dev;
297 
298 	((u8 *)(&ndev->config))[offset] = data;
299 }
300 
301 static u8 get_config(struct kvm *kvm, void *dev, u32 offset)
302 {
303 	struct net_dev *ndev = dev;
304 
305 	return ((u8 *)(&ndev->config))[offset];
306 }
307 
308 static u32 get_host_features(struct kvm *kvm, void *dev)
309 {
310 	return 1UL << VIRTIO_NET_F_MAC
311 		| 1UL << VIRTIO_NET_F_CSUM
312 		| 1UL << VIRTIO_NET_F_HOST_UFO
313 		| 1UL << VIRTIO_NET_F_HOST_TSO4
314 		| 1UL << VIRTIO_NET_F_HOST_TSO6
315 		| 1UL << VIRTIO_NET_F_GUEST_UFO
316 		| 1UL << VIRTIO_NET_F_GUEST_TSO4
317 		| 1UL << VIRTIO_NET_F_GUEST_TSO6
318 		| 1UL << VIRTIO_RING_F_EVENT_IDX
319 		| 1UL << VIRTIO_RING_F_INDIRECT_DESC;
320 }
321 
322 static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
323 {
324 	struct net_dev *ndev = dev;
325 
326 	ndev->features = features;
327 }
328 
329 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 pfn)
330 {
331 	struct vhost_vring_state state = { .index = vq };
332 	struct vhost_vring_addr addr;
333 	struct net_dev *ndev = dev;
334 	struct virt_queue *queue;
335 	void *p;
336 	int r;
337 
338 	compat__remove_message(compat_id);
339 
340 	queue		= &ndev->vqs[vq];
341 	queue->pfn	= pfn;
342 	p		= guest_pfn_to_host(kvm, queue->pfn);
343 
344 	vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN);
345 
346 	if (ndev->vhost_fd == 0)
347 		return 0;
348 
349 	state.num = queue->vring.num;
350 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state);
351 	if (r < 0)
352 		die_perror("VHOST_SET_VRING_NUM failed");
353 	state.num = 0;
354 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state);
355 	if (r < 0)
356 		die_perror("VHOST_SET_VRING_BASE failed");
357 
358 	addr = (struct vhost_vring_addr) {
359 		.index = vq,
360 		.desc_user_addr = (u64)(unsigned long)queue->vring.desc,
361 		.avail_user_addr = (u64)(unsigned long)queue->vring.avail,
362 		.used_user_addr = (u64)(unsigned long)queue->vring.used,
363 	};
364 
365 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr);
366 	if (r < 0)
367 		die_perror("VHOST_SET_VRING_ADDR failed");
368 
369 	return 0;
370 }
371 
372 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
373 {
374 	struct net_dev *ndev = dev;
375 	struct kvm_irqfd irq;
376 	struct vhost_vring_file file;
377 	int r;
378 
379 	if (ndev->vhost_fd == 0)
380 		return;
381 
382 	irq = (struct kvm_irqfd) {
383 		.gsi	= gsi,
384 		.fd	= eventfd(0, 0),
385 	};
386 	file = (struct vhost_vring_file) {
387 		.index	= vq,
388 		.fd	= irq.fd,
389 	};
390 
391 	r = ioctl(kvm->vm_fd, KVM_IRQFD, &irq);
392 	if (r < 0)
393 		die_perror("KVM_IRQFD failed");
394 
395 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file);
396 	if (r < 0)
397 		die_perror("VHOST_SET_VRING_CALL failed");
398 	file.fd = ndev->tap_fd;
399 	r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file);
400 	if (r != 0)
401 		die("VHOST_NET_SET_BACKEND failed %d", errno);
402 
403 }
404 
405 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
406 {
407 	struct net_dev *ndev = dev;
408 	struct vhost_vring_file file = {
409 		.index	= vq,
410 		.fd	= efd,
411 	};
412 	int r;
413 
414 	if (ndev->vhost_fd == 0)
415 		return;
416 
417 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file);
418 	if (r < 0)
419 		die_perror("VHOST_SET_VRING_KICK failed");
420 }
421 
422 static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
423 {
424 	struct net_dev *ndev = dev;
425 
426 	virtio_net_handle_callback(kvm, ndev, vq);
427 
428 	return 0;
429 }
430 
431 static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq)
432 {
433 	struct net_dev *ndev = dev;
434 
435 	return ndev->vqs[vq].pfn;
436 }
437 
438 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
439 {
440 	return VIRTIO_NET_QUEUE_SIZE;
441 }
442 
443 static struct virtio_ops net_dev_virtio_ops = (struct virtio_ops) {
444 	.set_config		= set_config,
445 	.get_config		= get_config,
446 	.get_host_features	= get_host_features,
447 	.set_guest_features	= set_guest_features,
448 	.init_vq		= init_vq,
449 	.notify_vq		= notify_vq,
450 	.get_pfn_vq		= get_pfn_vq,
451 	.get_size_vq		= get_size_vq,
452 	.notify_vq_gsi		= notify_vq_gsi,
453 	.notify_vq_eventfd	= notify_vq_eventfd,
454 };
455 
456 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev)
457 {
458 	u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX;
459 	struct vhost_memory *mem;
460 	int r;
461 
462 	ndev->vhost_fd = open("/dev/vhost-net", O_RDWR);
463 	if (ndev->vhost_fd < 0)
464 		die_perror("Failed openning vhost-net device");
465 
466 	mem = malloc(sizeof(*mem) + sizeof(struct vhost_memory_region));
467 	if (mem == NULL)
468 		die("Failed allocating memory for vhost memory map");
469 
470 	mem->nregions = 1;
471 	mem->regions[0] = (struct vhost_memory_region) {
472 		.guest_phys_addr	= 0,
473 		.memory_size		= kvm->ram_size,
474 		.userspace_addr		= (unsigned long)kvm->ram_start,
475 	};
476 
477 	r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER);
478 	if (r != 0)
479 		die_perror("VHOST_SET_OWNER failed");
480 
481 	r = ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features);
482 	if (r != 0)
483 		die_perror("VHOST_SET_FEATURES failed");
484 	r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem);
485 	if (r != 0)
486 		die_perror("VHOST_SET_MEM_TABLE failed");
487 	free(mem);
488 }
489 
490 void virtio_net__init(const struct virtio_net_params *params)
491 {
492 	int i;
493 	struct net_dev *ndev;
494 
495 	if (!params)
496 		return;
497 
498 	ndev = calloc(1, sizeof(struct net_dev));
499 	if (ndev == NULL)
500 		die("Failed allocating ndev");
501 
502 	list_add_tail(&ndev->list, &ndevs);
503 
504 	ndev->kvm = params->kvm;
505 
506 	mutex_init(&ndev->mutex);
507 	ndev->config.status = VIRTIO_NET_S_LINK_UP;
508 
509 	for (i = 0 ; i < 6 ; i++) {
510 		ndev->config.mac[i]		= params->guest_mac[i];
511 		ndev->info.guest_mac.addr[i]	= params->guest_mac[i];
512 		ndev->info.host_mac.addr[i]	= params->host_mac[i];
513 	}
514 
515 	ndev->mode = params->mode;
516 	if (ndev->mode == NET_MODE_TAP) {
517 		if (!virtio_net__tap_init(params, ndev))
518 			die_perror("You have requested a TAP device, but creation of one has"
519 					"failed because:");
520 		ndev->ops = &tap_ops;
521 	} else {
522 		ndev->info.host_ip		= ntohl(inet_addr(params->host_ip));
523 		ndev->info.guest_ip		= ntohl(inet_addr(params->guest_ip));
524 		ndev->info.guest_netmask	= ntohl(inet_addr("255.255.255.0"));
525 		ndev->info.buf_nr		= 20,
526 		uip_init(&ndev->info);
527 		ndev->ops = &uip_ops;
528 	}
529 
530 	virtio_trans_init(&ndev->vtrans, VIRTIO_PCI);
531 	ndev->vtrans.trans_ops->init(kvm, &ndev->vtrans, ndev, PCI_DEVICE_ID_VIRTIO_NET,
532 					VIRTIO_ID_NET, PCI_CLASS_NET);
533 	ndev->vtrans.virtio_ops = &net_dev_virtio_ops;
534 
535 	if (params->vhost)
536 		virtio_net__vhost_init(params->kvm, ndev);
537 	else
538 		virtio_net__io_thread_init(params->kvm, ndev);
539 
540 	if (compat_id != -1)
541 		compat_id = compat__add_message("virtio-net device was not detected",
542 						"While you have requested a virtio-net device, "
543 						"the guest kernel did not initialize it.\n"
544 						"Please make sure that the guest kernel was "
545 						"compiled with CONFIG_VIRTIO_NET=y enabled "
546 						"in its .config");
547 }
548