xref: /kvmtool/virtio/net.c (revision 1599d7242db63a7742d5a526cd9476357d7e1e92)
1 #include "kvm/virtio-pci-dev.h"
2 #include "kvm/virtio-net.h"
3 #include "kvm/virtio.h"
4 #include "kvm/types.h"
5 #include "kvm/mutex.h"
6 #include "kvm/util.h"
7 #include "kvm/kvm.h"
8 #include "kvm/irq.h"
9 #include "kvm/uip.h"
10 #include "kvm/guest_compat.h"
11 #include "kvm/virtio-pci.h"
12 
13 #include <linux/virtio_net.h>
14 #include <linux/if_tun.h>
15 #include <linux/types.h>
16 
17 #include <arpa/inet.h>
18 #include <net/if.h>
19 
20 #include <unistd.h>
21 #include <assert.h>
22 #include <fcntl.h>
23 
24 #include <sys/socket.h>
25 #include <sys/ioctl.h>
26 #include <sys/types.h>
27 #include <sys/wait.h>
28 
29 #define VIRTIO_NET_QUEUE_SIZE		128
30 #define VIRTIO_NET_NUM_QUEUES		2
31 #define VIRTIO_NET_RX_QUEUE		0
32 #define VIRTIO_NET_TX_QUEUE		1
33 
34 struct net_dev;
35 
36 extern struct kvm *kvm;
37 
38 struct net_dev_operations {
39 	int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
40 	int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
41 };
42 
43 struct net_dev {
44 	pthread_mutex_t			mutex;
45 	struct virtio_pci		vpci;
46 
47 	struct virt_queue		vqs[VIRTIO_NET_NUM_QUEUES];
48 	struct virtio_net_config	config;
49 	u32				features;
50 	int				compat_id;
51 
52 	pthread_t			io_rx_thread;
53 	pthread_mutex_t			io_rx_lock;
54 	pthread_cond_t			io_rx_cond;
55 
56 	pthread_t			io_tx_thread;
57 	pthread_mutex_t			io_tx_lock;
58 	pthread_cond_t			io_tx_cond;
59 
60 	int				tap_fd;
61 	char				tap_name[IFNAMSIZ];
62 
63 	int				mode;
64 
65 	struct uip_info			info;
66 	struct net_dev_operations	*ops;
67 };
68 
69 static struct net_dev ndev = {
70 	.mutex	= PTHREAD_MUTEX_INITIALIZER,
71 
72 	.config = {
73 		.status			= VIRTIO_NET_S_LINK_UP,
74 	},
75 	.info = {
76 		.buf_nr			= 20,
77 	}
78 };
79 
80 static void *virtio_net_rx_thread(void *p)
81 {
82 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
83 	struct virt_queue *vq;
84 	struct kvm *kvm;
85 	u16 out, in;
86 	u16 head;
87 	int len;
88 
89 	kvm	= p;
90 	vq	= &ndev.vqs[VIRTIO_NET_RX_QUEUE];
91 
92 	while (1) {
93 
94 		mutex_lock(&ndev.io_rx_lock);
95 		if (!virt_queue__available(vq))
96 			pthread_cond_wait(&ndev.io_rx_cond, &ndev.io_rx_lock);
97 		mutex_unlock(&ndev.io_rx_lock);
98 
99 		while (virt_queue__available(vq)) {
100 
101 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
102 
103 			len = ndev.ops->rx(iov, in, &ndev);
104 
105 			virt_queue__set_used_elem(vq, head, len);
106 
107 			/* We should interrupt guest right now, otherwise latency is huge. */
108 			virtio_pci__signal_vq(kvm, &ndev.vpci, VIRTIO_NET_RX_QUEUE);
109 		}
110 
111 	}
112 
113 	pthread_exit(NULL);
114 	return NULL;
115 
116 }
117 
118 static void *virtio_net_tx_thread(void *p)
119 {
120 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
121 	struct virt_queue *vq;
122 	struct kvm *kvm;
123 	u16 out, in;
124 	u16 head;
125 	int len;
126 
127 	kvm	= p;
128 	vq	= &ndev.vqs[VIRTIO_NET_TX_QUEUE];
129 
130 	while (1) {
131 		mutex_lock(&ndev.io_tx_lock);
132 		if (!virt_queue__available(vq))
133 			pthread_cond_wait(&ndev.io_tx_cond, &ndev.io_tx_lock);
134 		mutex_unlock(&ndev.io_tx_lock);
135 
136 		while (virt_queue__available(vq)) {
137 
138 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
139 
140 			len = ndev.ops->tx(iov, out, &ndev);
141 
142 			virt_queue__set_used_elem(vq, head, len);
143 		}
144 
145 		virtio_pci__signal_vq(kvm, &ndev.vpci, VIRTIO_NET_TX_QUEUE);
146 	}
147 
148 	pthread_exit(NULL);
149 
150 	return NULL;
151 
152 }
153 
154 static void virtio_net_handle_callback(struct kvm *kvm, u16 queue_index)
155 {
156 	switch (queue_index) {
157 	case VIRTIO_NET_TX_QUEUE:
158 		mutex_lock(&ndev.io_tx_lock);
159 		pthread_cond_signal(&ndev.io_tx_cond);
160 		mutex_unlock(&ndev.io_tx_lock);
161 		break;
162 	case VIRTIO_NET_RX_QUEUE:
163 		mutex_lock(&ndev.io_rx_lock);
164 		pthread_cond_signal(&ndev.io_rx_cond);
165 		mutex_unlock(&ndev.io_rx_lock);
166 		break;
167 	default:
168 		pr_warning("Unknown queue index %u", queue_index);
169 	}
170 }
171 
172 static bool virtio_net__tap_init(const struct virtio_net_parameters *params)
173 {
174 	int sock = socket(AF_INET, SOCK_STREAM, 0);
175 	int pid, status, offload, hdr_len;
176 	struct sockaddr_in sin = {0};
177 	struct ifreq ifr;
178 
179 	ndev.tap_fd = open("/dev/net/tun", O_RDWR);
180 	if (ndev.tap_fd < 0) {
181 		pr_warning("Unable to open /dev/net/tun");
182 		goto fail;
183 	}
184 
185 	memset(&ifr, 0, sizeof(ifr));
186 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
187 	if (ioctl(ndev.tap_fd, TUNSETIFF, &ifr) < 0) {
188 		pr_warning("Config tap device error. Are you root?");
189 		goto fail;
190 	}
191 
192 	strncpy(ndev.tap_name, ifr.ifr_name, sizeof(ndev.tap_name));
193 
194 	if (ioctl(ndev.tap_fd, TUNSETNOCSUM, 1) < 0) {
195 		pr_warning("Config tap device TUNSETNOCSUM error");
196 		goto fail;
197 	}
198 
199 	hdr_len = sizeof(struct virtio_net_hdr);
200 	if (ioctl(ndev.tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) {
201 		pr_warning("Config tap device TUNSETVNETHDRSZ error");
202 	}
203 
204 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
205 	if (ioctl(ndev.tap_fd, TUNSETOFFLOAD, offload) < 0) {
206 		pr_warning("Config tap device TUNSETOFFLOAD error");
207 		goto fail;
208 	}
209 
210 	if (strcmp(params->script, "none")) {
211 		pid = fork();
212 		if (pid == 0) {
213 			execl(params->script, params->script, ndev.tap_name, NULL);
214 			_exit(1);
215 		} else {
216 			waitpid(pid, &status, 0);
217 			if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
218 				pr_warning("Fail to setup tap by %s", params->script);
219 				goto fail;
220 			}
221 		}
222 	} else {
223 		memset(&ifr, 0, sizeof(ifr));
224 		strncpy(ifr.ifr_name, ndev.tap_name, sizeof(ndev.tap_name));
225 		sin.sin_addr.s_addr = inet_addr(params->host_ip);
226 		memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
227 		ifr.ifr_addr.sa_family = AF_INET;
228 		if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
229 			pr_warning("Could not set ip address on tap device");
230 			goto fail;
231 		}
232 	}
233 
234 	memset(&ifr, 0, sizeof(ifr));
235 	strncpy(ifr.ifr_name, ndev.tap_name, sizeof(ndev.tap_name));
236 	ioctl(sock, SIOCGIFFLAGS, &ifr);
237 	ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
238 	if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
239 		pr_warning("Could not bring tap device up");
240 
241 	close(sock);
242 
243 	return 1;
244 
245 fail:
246 	if (sock >= 0)
247 		close(sock);
248 	if (ndev.tap_fd >= 0)
249 		close(ndev.tap_fd);
250 
251 	return 0;
252 }
253 
254 static void virtio_net__io_thread_init(struct kvm *kvm)
255 {
256 	pthread_mutex_init(&ndev.io_rx_lock, NULL);
257 	pthread_cond_init(&ndev.io_tx_cond, NULL);
258 
259 	pthread_mutex_init(&ndev.io_rx_lock, NULL);
260 	pthread_cond_init(&ndev.io_tx_cond, NULL);
261 
262 	pthread_create(&ndev.io_rx_thread, NULL, virtio_net_rx_thread, (void *)kvm);
263 	pthread_create(&ndev.io_tx_thread, NULL, virtio_net_tx_thread, (void *)kvm);
264 }
265 
266 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
267 {
268 	return writev(ndev->tap_fd, iov, out);
269 }
270 
271 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
272 {
273 	return readv(ndev->tap_fd, iov, in);
274 }
275 
276 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
277 {
278 	return uip_tx(iov, out, &ndev->info);
279 }
280 
281 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
282 {
283 	return uip_rx(iov, in, &ndev->info);
284 }
285 
286 static struct net_dev_operations tap_ops = {
287 	.rx	= tap_ops_rx,
288 	.tx	= tap_ops_tx,
289 };
290 
291 static struct net_dev_operations uip_ops = {
292 	.rx	= uip_ops_rx,
293 	.tx	= uip_ops_tx,
294 };
295 
296 static void set_config(struct kvm *kvm, void *dev, u8 data, u32 offset)
297 {
298 	struct net_dev *ndev = dev;
299 
300 	((u8 *)(&ndev->config))[offset] = data;
301 }
302 
303 static u8 get_config(struct kvm *kvm, void *dev, u32 offset)
304 {
305 	struct net_dev *ndev = dev;
306 
307 	return ((u8 *)(&ndev->config))[offset];
308 }
309 
310 static u32 get_host_features(struct kvm *kvm, void *dev)
311 {
312 	return 1UL << VIRTIO_NET_F_MAC
313 		| 1UL << VIRTIO_NET_F_CSUM
314 		| 1UL << VIRTIO_NET_F_HOST_UFO
315 		| 1UL << VIRTIO_NET_F_HOST_TSO4
316 		| 1UL << VIRTIO_NET_F_HOST_TSO6
317 		| 1UL << VIRTIO_NET_F_GUEST_UFO
318 		| 1UL << VIRTIO_NET_F_GUEST_TSO4
319 		| 1UL << VIRTIO_NET_F_GUEST_TSO6;
320 }
321 
322 static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
323 {
324 	struct net_dev *ndev = dev;
325 
326 	ndev->features = features;
327 }
328 
329 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 pfn)
330 {
331 	struct net_dev *ndev = dev;
332 	struct virt_queue *queue;
333 	void *p;
334 
335 	compat__remove_message(ndev->compat_id);
336 
337 	queue			= &ndev->vqs[vq];
338 	queue->pfn		= pfn;
339 	p			= guest_pfn_to_host(kvm, queue->pfn);
340 
341 	vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN);
342 
343 	return 0;
344 }
345 
346 static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
347 {
348 	virtio_net_handle_callback(kvm, vq);
349 
350 	return 0;
351 }
352 
353 static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq)
354 {
355 	struct net_dev *ndev = dev;
356 
357 	return ndev->vqs[vq].pfn;
358 }
359 
360 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
361 {
362 	return VIRTIO_NET_QUEUE_SIZE;
363 }
364 
365 void virtio_net__init(const struct virtio_net_parameters *params)
366 {
367 	int i;
368 
369 	for (i = 0 ; i < 6 ; i++) {
370 		ndev.config.mac[i]		= params->guest_mac[i];
371 		ndev.info.guest_mac.addr[i]	= params->guest_mac[i];
372 		ndev.info.host_mac.addr[i]	= params->host_mac[i];
373 	}
374 
375 	ndev.mode = params->mode;
376 	if (ndev.mode == NET_MODE_TAP) {
377 		virtio_net__tap_init(params);
378 		ndev.ops = &tap_ops;
379 	} else {
380 		ndev.info.host_ip		= ntohl(inet_addr(params->host_ip));
381 		ndev.info.guest_ip		= ntohl(inet_addr(params->guest_ip));
382 		ndev.info.guest_netmask		= ntohl(inet_addr("255.255.255.0"));
383 		uip_init(&ndev.info);
384 		ndev.ops = &uip_ops;
385 	}
386 
387 	virtio_pci__init(kvm, &ndev.vpci, &ndev, PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET);
388 	ndev.vpci.ops = (struct virtio_pci_ops) {
389 		.set_config		= set_config,
390 		.get_config		= get_config,
391 		.get_host_features	= get_host_features,
392 		.set_guest_features	= set_guest_features,
393 		.init_vq		= init_vq,
394 		.notify_vq		= notify_vq,
395 		.get_pfn_vq		= get_pfn_vq,
396 		.get_size_vq		= get_size_vq,
397 	};
398 
399 	virtio_net__io_thread_init(params->kvm);
400 
401 	ndev.compat_id = compat__add_message("virtio-net device was not detected",
402 						"While you have requested a virtio-net device, "
403 						"the guest kernel didn't seem to detect it.\n"
404 						"Please make sure that the kernel was compiled"
405 						"with CONFIG_VIRTIO_NET.");
406 }
407