xref: /kvmtool/virtio/net.c (revision 507e02d818ab3f0f53480fbf47c9a8da34224448)
1 #include "kvm/virtio-pci-dev.h"
2 #include "kvm/virtio-net.h"
3 #include "kvm/virtio.h"
4 #include "kvm/types.h"
5 #include "kvm/mutex.h"
6 #include "kvm/util.h"
7 #include "kvm/kvm.h"
8 #include "kvm/irq.h"
9 #include "kvm/uip.h"
10 #include "kvm/guest_compat.h"
11 #include "kvm/virtio-pci.h"
12 
13 #include <linux/virtio_net.h>
14 #include <linux/if_tun.h>
15 #include <linux/types.h>
16 
17 #include <arpa/inet.h>
18 #include <net/if.h>
19 
20 #include <unistd.h>
21 #include <assert.h>
22 #include <fcntl.h>
23 
24 #include <sys/socket.h>
25 #include <sys/ioctl.h>
26 #include <sys/types.h>
27 #include <sys/wait.h>
28 
29 #define VIRTIO_NET_QUEUE_SIZE		128
30 #define VIRTIO_NET_NUM_QUEUES		2
31 #define VIRTIO_NET_RX_QUEUE		0
32 #define VIRTIO_NET_TX_QUEUE		1
33 
34 struct net_dev;
35 
36 extern struct kvm *kvm;
37 
38 struct net_dev_operations {
39 	int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
40 	int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
41 };
42 
43 struct net_dev {
44 	pthread_mutex_t			mutex;
45 	struct virtio_pci		vpci;
46 	struct list_head		list;
47 
48 	struct virt_queue		vqs[VIRTIO_NET_NUM_QUEUES];
49 	struct virtio_net_config	config;
50 	u32				features;
51 
52 	pthread_t			io_rx_thread;
53 	pthread_mutex_t			io_rx_lock;
54 	pthread_cond_t			io_rx_cond;
55 
56 	pthread_t			io_tx_thread;
57 	pthread_mutex_t			io_tx_lock;
58 	pthread_cond_t			io_tx_cond;
59 
60 	int				tap_fd;
61 	char				tap_name[IFNAMSIZ];
62 
63 	int				mode;
64 
65 	struct uip_info			info;
66 	struct net_dev_operations	*ops;
67 	struct kvm			*kvm;
68 };
69 
70 static LIST_HEAD(ndevs);
71 static int compat_id = -1;
72 
73 static void *virtio_net_rx_thread(void *p)
74 {
75 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
76 	struct virt_queue *vq;
77 	struct kvm *kvm;
78 	struct net_dev *ndev = p;
79 	u16 out, in;
80 	u16 head;
81 	int len;
82 
83 	kvm	= ndev->kvm;
84 	vq	= &ndev->vqs[VIRTIO_NET_RX_QUEUE];
85 
86 	while (1) {
87 
88 		mutex_lock(&ndev->io_rx_lock);
89 		if (!virt_queue__available(vq))
90 			pthread_cond_wait(&ndev->io_rx_cond, &ndev->io_rx_lock);
91 		mutex_unlock(&ndev->io_rx_lock);
92 
93 		while (virt_queue__available(vq)) {
94 
95 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
96 
97 			len = ndev->ops->rx(iov, in, ndev);
98 
99 			virt_queue__set_used_elem(vq, head, len);
100 
101 			/* We should interrupt guest right now, otherwise latency is huge. */
102 			virtio_pci__signal_vq(kvm, &ndev->vpci, VIRTIO_NET_RX_QUEUE);
103 		}
104 
105 	}
106 
107 	pthread_exit(NULL);
108 	return NULL;
109 
110 }
111 
112 static void *virtio_net_tx_thread(void *p)
113 {
114 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
115 	struct virt_queue *vq;
116 	struct kvm *kvm;
117 	struct net_dev *ndev = p;
118 	u16 out, in;
119 	u16 head;
120 	int len;
121 
122 	kvm	= ndev->kvm;
123 	vq	= &ndev->vqs[VIRTIO_NET_TX_QUEUE];
124 
125 	while (1) {
126 		mutex_lock(&ndev->io_tx_lock);
127 		if (!virt_queue__available(vq))
128 			pthread_cond_wait(&ndev->io_tx_cond, &ndev->io_tx_lock);
129 		mutex_unlock(&ndev->io_tx_lock);
130 
131 		while (virt_queue__available(vq)) {
132 
133 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
134 
135 			len = ndev->ops->tx(iov, out, ndev);
136 
137 			virt_queue__set_used_elem(vq, head, len);
138 		}
139 
140 		virtio_pci__signal_vq(kvm, &ndev->vpci, VIRTIO_NET_TX_QUEUE);
141 	}
142 
143 	pthread_exit(NULL);
144 
145 	return NULL;
146 
147 }
148 
149 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue)
150 {
151 	switch (queue) {
152 	case VIRTIO_NET_TX_QUEUE:
153 		mutex_lock(&ndev->io_tx_lock);
154 		pthread_cond_signal(&ndev->io_tx_cond);
155 		mutex_unlock(&ndev->io_tx_lock);
156 		break;
157 	case VIRTIO_NET_RX_QUEUE:
158 		mutex_lock(&ndev->io_rx_lock);
159 		pthread_cond_signal(&ndev->io_rx_cond);
160 		mutex_unlock(&ndev->io_rx_lock);
161 		break;
162 	default:
163 		pr_warning("Unknown queue index %u", queue);
164 	}
165 }
166 
167 static bool virtio_net__tap_init(const struct virtio_net_params *params,
168 					struct net_dev *ndev)
169 {
170 	int sock = socket(AF_INET, SOCK_STREAM, 0);
171 	int pid, status, offload, hdr_len;
172 	struct sockaddr_in sin = {0};
173 	struct ifreq ifr;
174 
175 	ndev->tap_fd = open("/dev/net/tun", O_RDWR);
176 	if (ndev->tap_fd < 0) {
177 		pr_warning("Unable to open /dev/net/tun");
178 		goto fail;
179 	}
180 
181 	memset(&ifr, 0, sizeof(ifr));
182 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
183 	if (ioctl(ndev->tap_fd, TUNSETIFF, &ifr) < 0) {
184 		pr_warning("Config tap device error. Are you root?");
185 		goto fail;
186 	}
187 
188 	strncpy(ndev->tap_name, ifr.ifr_name, sizeof(ndev->tap_name));
189 
190 	if (ioctl(ndev->tap_fd, TUNSETNOCSUM, 1) < 0) {
191 		pr_warning("Config tap device TUNSETNOCSUM error");
192 		goto fail;
193 	}
194 
195 	hdr_len = sizeof(struct virtio_net_hdr);
196 	if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
197 		pr_warning("Config tap device TUNSETVNETHDRSZ error");
198 
199 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
200 	if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
201 		pr_warning("Config tap device TUNSETOFFLOAD error");
202 		goto fail;
203 	}
204 
205 	if (strcmp(params->script, "none")) {
206 		pid = fork();
207 		if (pid == 0) {
208 			execl(params->script, params->script, ndev->tap_name, NULL);
209 			_exit(1);
210 		} else {
211 			waitpid(pid, &status, 0);
212 			if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
213 				pr_warning("Fail to setup tap by %s", params->script);
214 				goto fail;
215 			}
216 		}
217 	} else {
218 		memset(&ifr, 0, sizeof(ifr));
219 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
220 		sin.sin_addr.s_addr = inet_addr(params->host_ip);
221 		memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
222 		ifr.ifr_addr.sa_family = AF_INET;
223 		if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
224 			pr_warning("Could not set ip address on tap device");
225 			goto fail;
226 		}
227 	}
228 
229 	memset(&ifr, 0, sizeof(ifr));
230 	strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
231 	ioctl(sock, SIOCGIFFLAGS, &ifr);
232 	ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
233 	if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
234 		pr_warning("Could not bring tap device up");
235 
236 	close(sock);
237 
238 	return 1;
239 
240 fail:
241 	if (sock >= 0)
242 		close(sock);
243 	if (ndev->tap_fd >= 0)
244 		close(ndev->tap_fd);
245 
246 	return 0;
247 }
248 
249 static void virtio_net__io_thread_init(struct kvm *kvm, struct net_dev *ndev)
250 {
251 	pthread_mutex_init(&ndev->io_rx_lock, NULL);
252 	pthread_cond_init(&ndev->io_tx_cond, NULL);
253 
254 	pthread_mutex_init(&ndev->io_rx_lock, NULL);
255 	pthread_cond_init(&ndev->io_tx_cond, NULL);
256 
257 	pthread_create(&ndev->io_rx_thread, NULL, virtio_net_rx_thread, ndev);
258 	pthread_create(&ndev->io_tx_thread, NULL, virtio_net_tx_thread, ndev);
259 }
260 
261 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
262 {
263 	return writev(ndev->tap_fd, iov, out);
264 }
265 
266 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
267 {
268 	return readv(ndev->tap_fd, iov, in);
269 }
270 
271 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
272 {
273 	return uip_tx(iov, out, &ndev->info);
274 }
275 
276 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
277 {
278 	return uip_rx(iov, in, &ndev->info);
279 }
280 
281 static struct net_dev_operations tap_ops = {
282 	.rx	= tap_ops_rx,
283 	.tx	= tap_ops_tx,
284 };
285 
286 static struct net_dev_operations uip_ops = {
287 	.rx	= uip_ops_rx,
288 	.tx	= uip_ops_tx,
289 };
290 
291 static void set_config(struct kvm *kvm, void *dev, u8 data, u32 offset)
292 {
293 	struct net_dev *ndev = dev;
294 
295 	((u8 *)(&ndev->config))[offset] = data;
296 }
297 
298 static u8 get_config(struct kvm *kvm, void *dev, u32 offset)
299 {
300 	struct net_dev *ndev = dev;
301 
302 	return ((u8 *)(&ndev->config))[offset];
303 }
304 
305 static u32 get_host_features(struct kvm *kvm, void *dev)
306 {
307 	return 1UL << VIRTIO_NET_F_MAC
308 		| 1UL << VIRTIO_NET_F_CSUM
309 		| 1UL << VIRTIO_NET_F_HOST_UFO
310 		| 1UL << VIRTIO_NET_F_HOST_TSO4
311 		| 1UL << VIRTIO_NET_F_HOST_TSO6
312 		| 1UL << VIRTIO_NET_F_GUEST_UFO
313 		| 1UL << VIRTIO_NET_F_GUEST_TSO4
314 		| 1UL << VIRTIO_NET_F_GUEST_TSO6;
315 }
316 
317 static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
318 {
319 	struct net_dev *ndev = dev;
320 
321 	ndev->features = features;
322 }
323 
324 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 pfn)
325 {
326 	struct net_dev *ndev = dev;
327 	struct virt_queue *queue;
328 	void *p;
329 
330 	compat__remove_message(compat_id);
331 
332 	queue			= &ndev->vqs[vq];
333 	queue->pfn		= pfn;
334 	p			= guest_pfn_to_host(kvm, queue->pfn);
335 
336 	vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN);
337 
338 	return 0;
339 }
340 
341 static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
342 {
343 	struct net_dev *ndev = dev;
344 
345 	virtio_net_handle_callback(kvm, ndev, vq);
346 
347 	return 0;
348 }
349 
350 static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq)
351 {
352 	struct net_dev *ndev = dev;
353 
354 	return ndev->vqs[vq].pfn;
355 }
356 
357 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
358 {
359 	return VIRTIO_NET_QUEUE_SIZE;
360 }
361 
362 void virtio_net__init(const struct virtio_net_params *params)
363 {
364 	int i;
365 	struct net_dev *ndev;
366 
367 	if (!params)
368 		return;
369 
370 	ndev = calloc(1, sizeof(struct net_dev));
371 	if (ndev == NULL)
372 		die("Failed allocating ndev");
373 
374 	list_add_tail(&ndev->list, &ndevs);
375 
376 	ndev->kvm = params->kvm;
377 
378 	mutex_init(&ndev->mutex);
379 	ndev->config.status = VIRTIO_NET_S_LINK_UP;
380 
381 	for (i = 0 ; i < 6 ; i++) {
382 		ndev->config.mac[i]		= params->guest_mac[i];
383 		ndev->info.guest_mac.addr[i]	= params->guest_mac[i];
384 		ndev->info.host_mac.addr[i]	= params->host_mac[i];
385 	}
386 
387 	ndev->mode = params->mode;
388 	if (ndev->mode == NET_MODE_TAP) {
389 		if (!virtio_net__tap_init(params, ndev))
390 			die_perror("You have requested a TAP device, but creation of one has"
391 					"failed because:");
392 		ndev->ops = &tap_ops;
393 	} else {
394 		ndev->info.host_ip		= ntohl(inet_addr(params->host_ip));
395 		ndev->info.guest_ip		= ntohl(inet_addr(params->guest_ip));
396 		ndev->info.guest_netmask	= ntohl(inet_addr("255.255.255.0"));
397 		ndev->info.buf_nr		= 20,
398 		uip_init(&ndev->info);
399 		ndev->ops = &uip_ops;
400 	}
401 
402 	virtio_pci__init(kvm, &ndev->vpci, ndev, PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET);
403 	ndev->vpci.ops = (struct virtio_pci_ops) {
404 		.set_config		= set_config,
405 		.get_config		= get_config,
406 		.get_host_features	= get_host_features,
407 		.set_guest_features	= set_guest_features,
408 		.init_vq		= init_vq,
409 		.notify_vq		= notify_vq,
410 		.get_pfn_vq		= get_pfn_vq,
411 		.get_size_vq		= get_size_vq,
412 	};
413 
414 	virtio_net__io_thread_init(params->kvm, ndev);
415 
416 	if (compat_id != -1)
417 		compat_id = compat__add_message("virtio-net device was not detected",
418 						"While you have requested a virtio-net device, "
419 						"the guest kernel did not initialize it.\n"
420 						"Please make sure that the guest kernel was "
421 						"compiled with CONFIG_VIRTIO_NET=y enabled "
422 						"in its .config");
423 }
424