xref: /kvmtool/virtio/net.c (revision 9ed67cdc82dd02ef70ec254b0a347c389ed392fe)
1 #include "kvm/virtio-pci-dev.h"
2 #include "kvm/virtio-net.h"
3 #include "kvm/virtio.h"
4 #include "kvm/types.h"
5 #include "kvm/mutex.h"
6 #include "kvm/util.h"
7 #include "kvm/kvm.h"
8 #include "kvm/irq.h"
9 #include "kvm/uip.h"
10 #include "kvm/guest_compat.h"
11 #include "kvm/virtio-trans.h"
12 
13 #include <linux/vhost.h>
14 #include <linux/virtio_net.h>
15 #include <linux/if_tun.h>
16 #include <linux/types.h>
17 
18 #include <arpa/inet.h>
19 #include <net/if.h>
20 
21 #include <unistd.h>
22 #include <assert.h>
23 #include <fcntl.h>
24 
25 #include <sys/socket.h>
26 #include <sys/ioctl.h>
27 #include <sys/types.h>
28 #include <sys/wait.h>
29 #include <sys/eventfd.h>
30 
31 #define VIRTIO_NET_QUEUE_SIZE		128
32 #define VIRTIO_NET_NUM_QUEUES		2
33 #define VIRTIO_NET_RX_QUEUE		0
34 #define VIRTIO_NET_TX_QUEUE		1
35 
36 struct net_dev;
37 
38 extern struct kvm *kvm;
39 
40 struct net_dev_operations {
41 	int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
42 	int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
43 };
44 
45 struct net_dev {
46 	pthread_mutex_t			mutex;
47 	struct virtio_trans		vtrans;
48 	struct list_head		list;
49 
50 	struct virt_queue		vqs[VIRTIO_NET_NUM_QUEUES];
51 	struct virtio_net_config	config;
52 	u32				features;
53 
54 	pthread_t			io_rx_thread;
55 	pthread_mutex_t			io_rx_lock;
56 	pthread_cond_t			io_rx_cond;
57 
58 	pthread_t			io_tx_thread;
59 	pthread_mutex_t			io_tx_lock;
60 	pthread_cond_t			io_tx_cond;
61 
62 	int				vhost_fd;
63 	int				tap_fd;
64 	char				tap_name[IFNAMSIZ];
65 
66 	int				mode;
67 
68 	struct uip_info			info;
69 	struct net_dev_operations	*ops;
70 	struct kvm			*kvm;
71 };
72 
73 static LIST_HEAD(ndevs);
74 static int compat_id = -1;
75 
76 static void *virtio_net_rx_thread(void *p)
77 {
78 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
79 	struct virt_queue *vq;
80 	struct kvm *kvm;
81 	struct net_dev *ndev = p;
82 	u16 out, in;
83 	u16 head;
84 	int len;
85 
86 	kvm	= ndev->kvm;
87 	vq	= &ndev->vqs[VIRTIO_NET_RX_QUEUE];
88 
89 	while (1) {
90 
91 		mutex_lock(&ndev->io_rx_lock);
92 		if (!virt_queue__available(vq))
93 			pthread_cond_wait(&ndev->io_rx_cond, &ndev->io_rx_lock);
94 		mutex_unlock(&ndev->io_rx_lock);
95 
96 		while (virt_queue__available(vq)) {
97 
98 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
99 
100 			len = ndev->ops->rx(iov, in, ndev);
101 
102 			virt_queue__set_used_elem(vq, head, len);
103 
104 			/* We should interrupt guest right now, otherwise latency is huge. */
105 			ndev->vtrans.trans_ops->signal_vq(kvm, &ndev->vtrans, VIRTIO_NET_RX_QUEUE);
106 		}
107 
108 	}
109 
110 	pthread_exit(NULL);
111 	return NULL;
112 
113 }
114 
115 static void *virtio_net_tx_thread(void *p)
116 {
117 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
118 	struct virt_queue *vq;
119 	struct kvm *kvm;
120 	struct net_dev *ndev = p;
121 	u16 out, in;
122 	u16 head;
123 	int len;
124 
125 	kvm	= ndev->kvm;
126 	vq	= &ndev->vqs[VIRTIO_NET_TX_QUEUE];
127 
128 	while (1) {
129 		mutex_lock(&ndev->io_tx_lock);
130 		if (!virt_queue__available(vq))
131 			pthread_cond_wait(&ndev->io_tx_cond, &ndev->io_tx_lock);
132 		mutex_unlock(&ndev->io_tx_lock);
133 
134 		while (virt_queue__available(vq)) {
135 
136 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
137 
138 			len = ndev->ops->tx(iov, out, ndev);
139 
140 			virt_queue__set_used_elem(vq, head, len);
141 		}
142 
143 		ndev->vtrans.trans_ops->signal_vq(kvm, &ndev->vtrans, VIRTIO_NET_TX_QUEUE);
144 	}
145 
146 	pthread_exit(NULL);
147 
148 	return NULL;
149 
150 }
151 
152 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue)
153 {
154 	switch (queue) {
155 	case VIRTIO_NET_TX_QUEUE:
156 		mutex_lock(&ndev->io_tx_lock);
157 		pthread_cond_signal(&ndev->io_tx_cond);
158 		mutex_unlock(&ndev->io_tx_lock);
159 		break;
160 	case VIRTIO_NET_RX_QUEUE:
161 		mutex_lock(&ndev->io_rx_lock);
162 		pthread_cond_signal(&ndev->io_rx_cond);
163 		mutex_unlock(&ndev->io_rx_lock);
164 		break;
165 	default:
166 		pr_warning("Unknown queue index %u", queue);
167 	}
168 }
169 
170 static bool virtio_net__tap_init(const struct virtio_net_params *params,
171 					struct net_dev *ndev)
172 {
173 	int sock = socket(AF_INET, SOCK_STREAM, 0);
174 	int pid, status, offload, hdr_len;
175 	struct sockaddr_in sin = {0};
176 	struct ifreq ifr;
177 
178 	ndev->tap_fd = open("/dev/net/tun", O_RDWR);
179 	if (ndev->tap_fd < 0) {
180 		pr_warning("Unable to open /dev/net/tun");
181 		goto fail;
182 	}
183 
184 	memset(&ifr, 0, sizeof(ifr));
185 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
186 	if (ioctl(ndev->tap_fd, TUNSETIFF, &ifr) < 0) {
187 		pr_warning("Config tap device error. Are you root?");
188 		goto fail;
189 	}
190 
191 	strncpy(ndev->tap_name, ifr.ifr_name, sizeof(ndev->tap_name));
192 
193 	if (ioctl(ndev->tap_fd, TUNSETNOCSUM, 1) < 0) {
194 		pr_warning("Config tap device TUNSETNOCSUM error");
195 		goto fail;
196 	}
197 
198 	hdr_len = sizeof(struct virtio_net_hdr);
199 	if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
200 		pr_warning("Config tap device TUNSETVNETHDRSZ error");
201 
202 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
203 	if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
204 		pr_warning("Config tap device TUNSETOFFLOAD error");
205 		goto fail;
206 	}
207 
208 	if (strcmp(params->script, "none")) {
209 		pid = fork();
210 		if (pid == 0) {
211 			execl(params->script, params->script, ndev->tap_name, NULL);
212 			_exit(1);
213 		} else {
214 			waitpid(pid, &status, 0);
215 			if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
216 				pr_warning("Fail to setup tap by %s", params->script);
217 				goto fail;
218 			}
219 		}
220 	} else {
221 		memset(&ifr, 0, sizeof(ifr));
222 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
223 		sin.sin_addr.s_addr = inet_addr(params->host_ip);
224 		memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
225 		ifr.ifr_addr.sa_family = AF_INET;
226 		if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
227 			pr_warning("Could not set ip address on tap device");
228 			goto fail;
229 		}
230 	}
231 
232 	memset(&ifr, 0, sizeof(ifr));
233 	strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
234 	ioctl(sock, SIOCGIFFLAGS, &ifr);
235 	ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
236 	if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
237 		pr_warning("Could not bring tap device up");
238 
239 	close(sock);
240 
241 	return 1;
242 
243 fail:
244 	if (sock >= 0)
245 		close(sock);
246 	if (ndev->tap_fd >= 0)
247 		close(ndev->tap_fd);
248 
249 	return 0;
250 }
251 
252 static void virtio_net__io_thread_init(struct kvm *kvm, struct net_dev *ndev)
253 {
254 	pthread_mutex_init(&ndev->io_tx_lock, NULL);
255 	pthread_mutex_init(&ndev->io_rx_lock, NULL);
256 
257 	pthread_cond_init(&ndev->io_tx_cond, NULL);
258 	pthread_cond_init(&ndev->io_rx_cond, NULL);
259 
260 	pthread_create(&ndev->io_tx_thread, NULL, virtio_net_tx_thread, ndev);
261 	pthread_create(&ndev->io_rx_thread, NULL, virtio_net_rx_thread, ndev);
262 }
263 
264 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
265 {
266 	return writev(ndev->tap_fd, iov, out);
267 }
268 
269 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
270 {
271 	return readv(ndev->tap_fd, iov, in);
272 }
273 
274 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
275 {
276 	return uip_tx(iov, out, &ndev->info);
277 }
278 
279 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
280 {
281 	return uip_rx(iov, in, &ndev->info);
282 }
283 
284 static struct net_dev_operations tap_ops = {
285 	.rx	= tap_ops_rx,
286 	.tx	= tap_ops_tx,
287 };
288 
289 static struct net_dev_operations uip_ops = {
290 	.rx	= uip_ops_rx,
291 	.tx	= uip_ops_tx,
292 };
293 
294 static void set_config(struct kvm *kvm, void *dev, u8 data, u32 offset)
295 {
296 	struct net_dev *ndev = dev;
297 
298 	((u8 *)(&ndev->config))[offset] = data;
299 }
300 
301 static u8 get_config(struct kvm *kvm, void *dev, u32 offset)
302 {
303 	struct net_dev *ndev = dev;
304 
305 	return ((u8 *)(&ndev->config))[offset];
306 }
307 
308 static u32 get_host_features(struct kvm *kvm, void *dev)
309 {
310 	return 1UL << VIRTIO_NET_F_MAC
311 		| 1UL << VIRTIO_NET_F_CSUM
312 		| 1UL << VIRTIO_NET_F_HOST_UFO
313 		| 1UL << VIRTIO_NET_F_HOST_TSO4
314 		| 1UL << VIRTIO_NET_F_HOST_TSO6
315 		| 1UL << VIRTIO_NET_F_GUEST_UFO
316 		| 1UL << VIRTIO_NET_F_GUEST_TSO4
317 		| 1UL << VIRTIO_NET_F_GUEST_TSO6;
318 }
319 
320 static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
321 {
322 	struct net_dev *ndev = dev;
323 
324 	ndev->features = features;
325 }
326 
327 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 pfn)
328 {
329 	struct vhost_vring_state state = { .index = vq };
330 	struct vhost_vring_addr addr;
331 	struct net_dev *ndev = dev;
332 	struct virt_queue *queue;
333 	void *p;
334 	int r;
335 
336 	compat__remove_message(compat_id);
337 
338 	queue			= &ndev->vqs[vq];
339 	queue->pfn		= pfn;
340 	p			= guest_pfn_to_host(kvm, queue->pfn);
341 
342 	vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN);
343 
344 	if (ndev->vhost_fd == 0)
345 		return 0;
346 
347 	state.num = queue->vring.num;
348 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state);
349 	if (r < 0)
350 		die_perror("VHOST_SET_VRING_NUM failed");
351 	state.num = 0;
352 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state);
353 	if (r < 0)
354 		die_perror("VHOST_SET_VRING_BASE failed");
355 
356 	addr = (struct vhost_vring_addr) {
357 		.index = vq,
358 		.desc_user_addr = (u64)(unsigned long)queue->vring.desc,
359 		.avail_user_addr = (u64)(unsigned long)queue->vring.avail,
360 		.used_user_addr = (u64)(unsigned long)queue->vring.used,
361 	};
362 
363 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr);
364 	if (r < 0)
365 		die_perror("VHOST_SET_VRING_ADDR failed");
366 
367 	return 0;
368 }
369 
370 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
371 {
372 	struct net_dev *ndev = dev;
373 	struct kvm_irqfd irq;
374 	struct vhost_vring_file file;
375 	int r;
376 
377 	if (ndev->vhost_fd == 0)
378 		return;
379 
380 	irq = (struct kvm_irqfd) {
381 		.gsi	= gsi,
382 		.fd	= eventfd(0, 0),
383 	};
384 	file = (struct vhost_vring_file) {
385 		.index	= vq,
386 		.fd	= irq.fd,
387 	};
388 
389 	r = ioctl(kvm->vm_fd, KVM_IRQFD, &irq);
390 	if (r < 0)
391 		die_perror("KVM_IRQFD failed");
392 
393 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file);
394 	if (r < 0)
395 		die_perror("VHOST_SET_VRING_CALL failed");
396 	file.fd = ndev->tap_fd;
397 	r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file);
398 	if (r != 0)
399 		die("VHOST_NET_SET_BACKEND failed %d", errno);
400 
401 }
402 
403 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
404 {
405 	struct net_dev *ndev = dev;
406 	struct vhost_vring_file file = {
407 		.index	= vq,
408 		.fd	= efd,
409 	};
410 	int r;
411 
412 	if (ndev->vhost_fd == 0)
413 		return;
414 
415 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file);
416 	if (r < 0)
417 		die_perror("VHOST_SET_VRING_KICK failed");
418 }
419 
420 static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
421 {
422 	struct net_dev *ndev = dev;
423 
424 	virtio_net_handle_callback(kvm, ndev, vq);
425 
426 	return 0;
427 }
428 
429 static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq)
430 {
431 	struct net_dev *ndev = dev;
432 
433 	return ndev->vqs[vq].pfn;
434 }
435 
436 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
437 {
438 	return VIRTIO_NET_QUEUE_SIZE;
439 }
440 
441 static struct virtio_ops net_dev_virtio_ops = (struct virtio_ops) {
442 	.set_config		= set_config,
443 	.get_config		= get_config,
444 	.get_host_features	= get_host_features,
445 	.set_guest_features	= set_guest_features,
446 	.init_vq		= init_vq,
447 	.notify_vq		= notify_vq,
448 	.get_pfn_vq		= get_pfn_vq,
449 	.get_size_vq		= get_size_vq,
450 	.notify_vq_gsi		= notify_vq_gsi,
451 	.notify_vq_eventfd	= notify_vq_eventfd,
452 };
453 
454 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev)
455 {
456 	u64 features = 0;
457 	struct vhost_memory *mem;
458 	int r;
459 
460 	ndev->vhost_fd = open("/dev/vhost-net", O_RDWR);
461 	if (ndev->vhost_fd < 0)
462 		die_perror("Failed openning vhost-net device");
463 
464 	mem = malloc(sizeof(*mem) + sizeof(struct vhost_memory_region));
465 	if (mem == NULL)
466 		die("Failed allocating memory for vhost memory map");
467 
468 	mem->nregions = 1;
469 	mem->regions[0] = (struct vhost_memory_region) {
470 		.guest_phys_addr	= 0,
471 		.memory_size		= kvm->ram_size,
472 		.userspace_addr		= (u64)kvm->ram_start,
473 	};
474 
475 	r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER);
476 	if (r != 0)
477 		die_perror("VHOST_SET_OWNER failed");
478 
479 	r = ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features);
480 	if (r != 0)
481 		die_perror("VHOST_SET_FEATURES failed");
482 	r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem);
483 	if (r != 0)
484 		die_perror("VHOST_SET_MEM_TABLE failed");
485 	free(mem);
486 }
487 
488 void virtio_net__init(const struct virtio_net_params *params)
489 {
490 	int i;
491 	struct net_dev *ndev;
492 
493 	if (!params)
494 		return;
495 
496 	ndev = calloc(1, sizeof(struct net_dev));
497 	if (ndev == NULL)
498 		die("Failed allocating ndev");
499 
500 	list_add_tail(&ndev->list, &ndevs);
501 
502 	ndev->kvm = params->kvm;
503 
504 	mutex_init(&ndev->mutex);
505 	ndev->config.status = VIRTIO_NET_S_LINK_UP;
506 
507 	for (i = 0 ; i < 6 ; i++) {
508 		ndev->config.mac[i]		= params->guest_mac[i];
509 		ndev->info.guest_mac.addr[i]	= params->guest_mac[i];
510 		ndev->info.host_mac.addr[i]	= params->host_mac[i];
511 	}
512 
513 	ndev->mode = params->mode;
514 	if (ndev->mode == NET_MODE_TAP) {
515 		if (!virtio_net__tap_init(params, ndev))
516 			die_perror("You have requested a TAP device, but creation of one has"
517 					"failed because:");
518 		ndev->ops = &tap_ops;
519 	} else {
520 		ndev->info.host_ip		= ntohl(inet_addr(params->host_ip));
521 		ndev->info.guest_ip		= ntohl(inet_addr(params->guest_ip));
522 		ndev->info.guest_netmask	= ntohl(inet_addr("255.255.255.0"));
523 		ndev->info.buf_nr		= 20,
524 		uip_init(&ndev->info);
525 		ndev->ops = &uip_ops;
526 	}
527 
528 	virtio_trans_init(&ndev->vtrans, VIRTIO_PCI);
529 	ndev->vtrans.trans_ops->init(kvm, &ndev->vtrans, ndev, PCI_DEVICE_ID_VIRTIO_NET,
530 					VIRTIO_ID_NET, PCI_CLASS_NET);
531 	ndev->vtrans.virtio_ops = &net_dev_virtio_ops;
532 
533 	if (params->vhost)
534 		virtio_net__vhost_init(params->kvm, ndev);
535 	else
536 		virtio_net__io_thread_init(params->kvm, ndev);
537 
538 	if (compat_id != -1)
539 		compat_id = compat__add_message("virtio-net device was not detected",
540 						"While you have requested a virtio-net device, "
541 						"the guest kernel did not initialize it.\n"
542 						"Please make sure that the guest kernel was "
543 						"compiled with CONFIG_VIRTIO_NET=y enabled "
544 						"in its .config");
545 }
546