xref: /kvmtool/virtio/net.c (revision f19edd1e9832f14334e7deb13adccbb1928d4124)
1 #include "kvm/virtio-pci-dev.h"
2 #include "kvm/virtio-net.h"
3 #include "kvm/virtio.h"
4 #include "kvm/types.h"
5 #include "kvm/mutex.h"
6 #include "kvm/util.h"
7 #include "kvm/kvm.h"
8 #include "kvm/irq.h"
9 #include "kvm/uip.h"
10 #include "kvm/guest_compat.h"
11 #include "kvm/virtio-trans.h"
12 
13 #include <linux/vhost.h>
14 #include <linux/virtio_net.h>
15 #include <linux/if_tun.h>
16 #include <linux/types.h>
17 
18 #include <arpa/inet.h>
19 #include <net/if.h>
20 
21 #include <unistd.h>
22 #include <assert.h>
23 #include <fcntl.h>
24 
25 #include <sys/socket.h>
26 #include <sys/ioctl.h>
27 #include <sys/types.h>
28 #include <sys/wait.h>
29 #include <sys/eventfd.h>
30 
31 #define VIRTIO_NET_QUEUE_SIZE		128
32 #define VIRTIO_NET_NUM_QUEUES		2
33 #define VIRTIO_NET_RX_QUEUE		0
34 #define VIRTIO_NET_TX_QUEUE		1
35 
36 struct net_dev;
37 
38 extern struct kvm *kvm;
39 
40 struct net_dev_operations {
41 	int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
42 	int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
43 };
44 
45 struct net_dev {
46 	pthread_mutex_t			mutex;
47 	struct virtio_trans		vtrans;
48 	struct list_head		list;
49 
50 	struct virt_queue		vqs[VIRTIO_NET_NUM_QUEUES];
51 	struct virtio_net_config	config;
52 	u32				features;
53 
54 	pthread_t			io_rx_thread;
55 	pthread_mutex_t			io_rx_lock;
56 	pthread_cond_t			io_rx_cond;
57 
58 	pthread_t			io_tx_thread;
59 	pthread_mutex_t			io_tx_lock;
60 	pthread_cond_t			io_tx_cond;
61 
62 	int				vhost_fd;
63 	int				tap_fd;
64 	char				tap_name[IFNAMSIZ];
65 
66 	int				mode;
67 
68 	struct uip_info			info;
69 	struct net_dev_operations	*ops;
70 	struct kvm			*kvm;
71 };
72 
73 static LIST_HEAD(ndevs);
74 static int compat_id = -1;
75 
76 static void *virtio_net_rx_thread(void *p)
77 {
78 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
79 	struct virt_queue *vq;
80 	struct kvm *kvm;
81 	struct net_dev *ndev = p;
82 	u16 out, in;
83 	u16 head;
84 	int len;
85 
86 	kvm	= ndev->kvm;
87 	vq	= &ndev->vqs[VIRTIO_NET_RX_QUEUE];
88 
89 	while (1) {
90 
91 		mutex_lock(&ndev->io_rx_lock);
92 		if (!virt_queue__available(vq))
93 			pthread_cond_wait(&ndev->io_rx_cond, &ndev->io_rx_lock);
94 		mutex_unlock(&ndev->io_rx_lock);
95 
96 		while (virt_queue__available(vq)) {
97 
98 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
99 
100 			len = ndev->ops->rx(iov, in, ndev);
101 
102 			virt_queue__set_used_elem(vq, head, len);
103 
104 			/* We should interrupt guest right now, otherwise latency is huge. */
105 			if (virtio_queue__should_signal(&ndev->vqs[VIRTIO_NET_RX_QUEUE]))
106 				ndev->vtrans.trans_ops->signal_vq(kvm, &ndev->vtrans,
107 								VIRTIO_NET_RX_QUEUE);
108 		}
109 
110 	}
111 
112 	pthread_exit(NULL);
113 	return NULL;
114 
115 }
116 
117 static void *virtio_net_tx_thread(void *p)
118 {
119 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
120 	struct virt_queue *vq;
121 	struct kvm *kvm;
122 	struct net_dev *ndev = p;
123 	u16 out, in;
124 	u16 head;
125 	int len;
126 
127 	kvm	= ndev->kvm;
128 	vq	= &ndev->vqs[VIRTIO_NET_TX_QUEUE];
129 
130 	while (1) {
131 		mutex_lock(&ndev->io_tx_lock);
132 		if (!virt_queue__available(vq))
133 			pthread_cond_wait(&ndev->io_tx_cond, &ndev->io_tx_lock);
134 		mutex_unlock(&ndev->io_tx_lock);
135 
136 		while (virt_queue__available(vq)) {
137 
138 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
139 
140 			len = ndev->ops->tx(iov, out, ndev);
141 
142 			virt_queue__set_used_elem(vq, head, len);
143 		}
144 
145 		if (virtio_queue__should_signal(&ndev->vqs[VIRTIO_NET_TX_QUEUE]))
146 			ndev->vtrans.trans_ops->signal_vq(kvm, &ndev->vtrans, VIRTIO_NET_TX_QUEUE);
147 	}
148 
149 	pthread_exit(NULL);
150 
151 	return NULL;
152 
153 }
154 
155 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue)
156 {
157 	switch (queue) {
158 	case VIRTIO_NET_TX_QUEUE:
159 		mutex_lock(&ndev->io_tx_lock);
160 		pthread_cond_signal(&ndev->io_tx_cond);
161 		mutex_unlock(&ndev->io_tx_lock);
162 		break;
163 	case VIRTIO_NET_RX_QUEUE:
164 		mutex_lock(&ndev->io_rx_lock);
165 		pthread_cond_signal(&ndev->io_rx_cond);
166 		mutex_unlock(&ndev->io_rx_lock);
167 		break;
168 	default:
169 		pr_warning("Unknown queue index %u", queue);
170 	}
171 }
172 
173 static bool virtio_net__tap_init(const struct virtio_net_params *params,
174 					struct net_dev *ndev)
175 {
176 	int sock = socket(AF_INET, SOCK_STREAM, 0);
177 	int pid, status, offload, hdr_len;
178 	struct sockaddr_in sin = {0};
179 	struct ifreq ifr;
180 
181 	/* Did the user already gave us the FD? */
182 	if (params->fd) {
183 		ndev->tap_fd = params->fd;
184 		return 1;
185 	}
186 
187 	ndev->tap_fd = open("/dev/net/tun", O_RDWR);
188 	if (ndev->tap_fd < 0) {
189 		pr_warning("Unable to open /dev/net/tun");
190 		goto fail;
191 	}
192 
193 	memset(&ifr, 0, sizeof(ifr));
194 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
195 	if (ioctl(ndev->tap_fd, TUNSETIFF, &ifr) < 0) {
196 		pr_warning("Config tap device error. Are you root?");
197 		goto fail;
198 	}
199 
200 	strncpy(ndev->tap_name, ifr.ifr_name, sizeof(ndev->tap_name));
201 
202 	if (ioctl(ndev->tap_fd, TUNSETNOCSUM, 1) < 0) {
203 		pr_warning("Config tap device TUNSETNOCSUM error");
204 		goto fail;
205 	}
206 
207 	hdr_len = sizeof(struct virtio_net_hdr);
208 	if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
209 		pr_warning("Config tap device TUNSETVNETHDRSZ error");
210 
211 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
212 	if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
213 		pr_warning("Config tap device TUNSETOFFLOAD error");
214 		goto fail;
215 	}
216 
217 	if (strcmp(params->script, "none")) {
218 		pid = fork();
219 		if (pid == 0) {
220 			execl(params->script, params->script, ndev->tap_name, NULL);
221 			_exit(1);
222 		} else {
223 			waitpid(pid, &status, 0);
224 			if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
225 				pr_warning("Fail to setup tap by %s", params->script);
226 				goto fail;
227 			}
228 		}
229 	} else {
230 		memset(&ifr, 0, sizeof(ifr));
231 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
232 		sin.sin_addr.s_addr = inet_addr(params->host_ip);
233 		memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
234 		ifr.ifr_addr.sa_family = AF_INET;
235 		if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
236 			pr_warning("Could not set ip address on tap device");
237 			goto fail;
238 		}
239 	}
240 
241 	memset(&ifr, 0, sizeof(ifr));
242 	strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
243 	ioctl(sock, SIOCGIFFLAGS, &ifr);
244 	ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
245 	if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
246 		pr_warning("Could not bring tap device up");
247 
248 	close(sock);
249 
250 	return 1;
251 
252 fail:
253 	if (sock >= 0)
254 		close(sock);
255 	if (ndev->tap_fd >= 0)
256 		close(ndev->tap_fd);
257 
258 	return 0;
259 }
260 
261 static void virtio_net__io_thread_init(struct kvm *kvm, struct net_dev *ndev)
262 {
263 	pthread_mutex_init(&ndev->io_tx_lock, NULL);
264 	pthread_mutex_init(&ndev->io_rx_lock, NULL);
265 
266 	pthread_cond_init(&ndev->io_tx_cond, NULL);
267 	pthread_cond_init(&ndev->io_rx_cond, NULL);
268 
269 	pthread_create(&ndev->io_tx_thread, NULL, virtio_net_tx_thread, ndev);
270 	pthread_create(&ndev->io_rx_thread, NULL, virtio_net_rx_thread, ndev);
271 }
272 
273 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
274 {
275 	return writev(ndev->tap_fd, iov, out);
276 }
277 
278 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
279 {
280 	return readv(ndev->tap_fd, iov, in);
281 }
282 
283 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
284 {
285 	return uip_tx(iov, out, &ndev->info);
286 }
287 
288 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
289 {
290 	return uip_rx(iov, in, &ndev->info);
291 }
292 
293 static struct net_dev_operations tap_ops = {
294 	.rx	= tap_ops_rx,
295 	.tx	= tap_ops_tx,
296 };
297 
298 static struct net_dev_operations uip_ops = {
299 	.rx	= uip_ops_rx,
300 	.tx	= uip_ops_tx,
301 };
302 
303 static void set_config(struct kvm *kvm, void *dev, u8 data, u32 offset)
304 {
305 	struct net_dev *ndev = dev;
306 
307 	((u8 *)(&ndev->config))[offset] = data;
308 }
309 
310 static u8 get_config(struct kvm *kvm, void *dev, u32 offset)
311 {
312 	struct net_dev *ndev = dev;
313 
314 	return ((u8 *)(&ndev->config))[offset];
315 }
316 
317 static u32 get_host_features(struct kvm *kvm, void *dev)
318 {
319 	return 1UL << VIRTIO_NET_F_MAC
320 		| 1UL << VIRTIO_NET_F_CSUM
321 		| 1UL << VIRTIO_NET_F_HOST_UFO
322 		| 1UL << VIRTIO_NET_F_HOST_TSO4
323 		| 1UL << VIRTIO_NET_F_HOST_TSO6
324 		| 1UL << VIRTIO_NET_F_GUEST_UFO
325 		| 1UL << VIRTIO_NET_F_GUEST_TSO4
326 		| 1UL << VIRTIO_NET_F_GUEST_TSO6
327 		| 1UL << VIRTIO_RING_F_EVENT_IDX
328 		| 1UL << VIRTIO_RING_F_INDIRECT_DESC;
329 }
330 
331 static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
332 {
333 	struct net_dev *ndev = dev;
334 
335 	ndev->features = features;
336 }
337 
338 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 pfn)
339 {
340 	struct vhost_vring_state state = { .index = vq };
341 	struct vhost_vring_addr addr;
342 	struct net_dev *ndev = dev;
343 	struct virt_queue *queue;
344 	void *p;
345 	int r;
346 
347 	compat__remove_message(compat_id);
348 
349 	queue			= &ndev->vqs[vq];
350 	queue->pfn		= pfn;
351 	p			= guest_pfn_to_host(kvm, queue->pfn);
352 
353 	vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN);
354 
355 	if (ndev->vhost_fd == 0)
356 		return 0;
357 
358 	state.num = queue->vring.num;
359 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state);
360 	if (r < 0)
361 		die_perror("VHOST_SET_VRING_NUM failed");
362 	state.num = 0;
363 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state);
364 	if (r < 0)
365 		die_perror("VHOST_SET_VRING_BASE failed");
366 
367 	addr = (struct vhost_vring_addr) {
368 		.index = vq,
369 		.desc_user_addr = (u64)(unsigned long)queue->vring.desc,
370 		.avail_user_addr = (u64)(unsigned long)queue->vring.avail,
371 		.used_user_addr = (u64)(unsigned long)queue->vring.used,
372 	};
373 
374 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr);
375 	if (r < 0)
376 		die_perror("VHOST_SET_VRING_ADDR failed");
377 
378 	return 0;
379 }
380 
381 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
382 {
383 	struct net_dev *ndev = dev;
384 	struct kvm_irqfd irq;
385 	struct vhost_vring_file file;
386 	int r;
387 
388 	if (ndev->vhost_fd == 0)
389 		return;
390 
391 	irq = (struct kvm_irqfd) {
392 		.gsi	= gsi,
393 		.fd	= eventfd(0, 0),
394 	};
395 	file = (struct vhost_vring_file) {
396 		.index	= vq,
397 		.fd	= irq.fd,
398 	};
399 
400 	r = ioctl(kvm->vm_fd, KVM_IRQFD, &irq);
401 	if (r < 0)
402 		die_perror("KVM_IRQFD failed");
403 
404 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file);
405 	if (r < 0)
406 		die_perror("VHOST_SET_VRING_CALL failed");
407 	file.fd = ndev->tap_fd;
408 	r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file);
409 	if (r != 0)
410 		die("VHOST_NET_SET_BACKEND failed %d", errno);
411 
412 }
413 
414 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
415 {
416 	struct net_dev *ndev = dev;
417 	struct vhost_vring_file file = {
418 		.index	= vq,
419 		.fd	= efd,
420 	};
421 	int r;
422 
423 	if (ndev->vhost_fd == 0)
424 		return;
425 
426 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file);
427 	if (r < 0)
428 		die_perror("VHOST_SET_VRING_KICK failed");
429 }
430 
431 static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
432 {
433 	struct net_dev *ndev = dev;
434 
435 	virtio_net_handle_callback(kvm, ndev, vq);
436 
437 	return 0;
438 }
439 
440 static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq)
441 {
442 	struct net_dev *ndev = dev;
443 
444 	return ndev->vqs[vq].pfn;
445 }
446 
447 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
448 {
449 	return VIRTIO_NET_QUEUE_SIZE;
450 }
451 
452 static struct virtio_ops net_dev_virtio_ops = (struct virtio_ops) {
453 	.set_config		= set_config,
454 	.get_config		= get_config,
455 	.get_host_features	= get_host_features,
456 	.set_guest_features	= set_guest_features,
457 	.init_vq		= init_vq,
458 	.notify_vq		= notify_vq,
459 	.get_pfn_vq		= get_pfn_vq,
460 	.get_size_vq		= get_size_vq,
461 	.notify_vq_gsi		= notify_vq_gsi,
462 	.notify_vq_eventfd	= notify_vq_eventfd,
463 };
464 
465 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev)
466 {
467 	u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX;
468 	struct vhost_memory *mem;
469 	int r;
470 
471 	ndev->vhost_fd = open("/dev/vhost-net", O_RDWR);
472 	if (ndev->vhost_fd < 0)
473 		die_perror("Failed openning vhost-net device");
474 
475 	mem = malloc(sizeof(*mem) + sizeof(struct vhost_memory_region));
476 	if (mem == NULL)
477 		die("Failed allocating memory for vhost memory map");
478 
479 	mem->nregions = 1;
480 	mem->regions[0] = (struct vhost_memory_region) {
481 		.guest_phys_addr	= 0,
482 		.memory_size		= kvm->ram_size,
483 		.userspace_addr		= (unsigned long)kvm->ram_start,
484 	};
485 
486 	r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER);
487 	if (r != 0)
488 		die_perror("VHOST_SET_OWNER failed");
489 
490 	r = ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features);
491 	if (r != 0)
492 		die_perror("VHOST_SET_FEATURES failed");
493 	r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem);
494 	if (r != 0)
495 		die_perror("VHOST_SET_MEM_TABLE failed");
496 	free(mem);
497 }
498 
499 void virtio_net__init(const struct virtio_net_params *params)
500 {
501 	int i;
502 	struct net_dev *ndev;
503 
504 	if (!params)
505 		return;
506 
507 	ndev = calloc(1, sizeof(struct net_dev));
508 	if (ndev == NULL)
509 		die("Failed allocating ndev");
510 
511 	list_add_tail(&ndev->list, &ndevs);
512 
513 	ndev->kvm = params->kvm;
514 
515 	mutex_init(&ndev->mutex);
516 	ndev->config.status = VIRTIO_NET_S_LINK_UP;
517 
518 	for (i = 0 ; i < 6 ; i++) {
519 		ndev->config.mac[i]		= params->guest_mac[i];
520 		ndev->info.guest_mac.addr[i]	= params->guest_mac[i];
521 		ndev->info.host_mac.addr[i]	= params->host_mac[i];
522 	}
523 
524 	ndev->mode = params->mode;
525 	if (ndev->mode == NET_MODE_TAP) {
526 		if (!virtio_net__tap_init(params, ndev))
527 			die_perror("You have requested a TAP device, but creation of one has"
528 					"failed because:");
529 		ndev->ops = &tap_ops;
530 	} else {
531 		ndev->info.host_ip		= ntohl(inet_addr(params->host_ip));
532 		ndev->info.guest_ip		= ntohl(inet_addr(params->guest_ip));
533 		ndev->info.guest_netmask	= ntohl(inet_addr("255.255.255.0"));
534 		ndev->info.buf_nr		= 20,
535 		uip_init(&ndev->info);
536 		ndev->ops = &uip_ops;
537 	}
538 
539 	virtio_trans_init(&ndev->vtrans, VIRTIO_PCI);
540 	ndev->vtrans.trans_ops->init(kvm, &ndev->vtrans, ndev, PCI_DEVICE_ID_VIRTIO_NET,
541 					VIRTIO_ID_NET, PCI_CLASS_NET);
542 	ndev->vtrans.virtio_ops = &net_dev_virtio_ops;
543 
544 	if (params->vhost)
545 		virtio_net__vhost_init(params->kvm, ndev);
546 	else
547 		virtio_net__io_thread_init(params->kvm, ndev);
548 
549 	if (compat_id != -1)
550 		compat_id = compat__add_message("virtio-net device was not detected",
551 						"While you have requested a virtio-net device, "
552 						"the guest kernel did not initialize it.\n"
553 						"Please make sure that the guest kernel was "
554 						"compiled with CONFIG_VIRTIO_NET=y enabled "
555 						"in its .config");
556 }
557