1 #include "kvm/virtio-pci-dev.h"
2 #include "kvm/virtio-net.h"
3 #include "kvm/virtio.h"
4 #include "kvm/mutex.h"
5 #include "kvm/util.h"
6 #include "kvm/kvm.h"
7 #include "kvm/uip.h"
8 #include "kvm/guest_compat.h"
9 #include "kvm/iovec.h"
10 #include "kvm/strbuf.h"
11
12 #include <linux/list.h>
13 #include <linux/vhost.h>
14 #include <linux/virtio_net.h>
15 #include <linux/if_tun.h>
16 #include <linux/types.h>
17
18 #include <arpa/inet.h>
19 #include <net/if.h>
20
21 #include <unistd.h>
22 #include <fcntl.h>
23
24 #include <sys/socket.h>
25 #include <sys/ioctl.h>
26 #include <sys/types.h>
27 #include <sys/wait.h>
28
29 #define VIRTIO_NET_QUEUE_SIZE 256
30 #define VIRTIO_NET_NUM_QUEUES 8
31
32 struct net_dev;
33
34 struct net_dev_operations {
35 int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
36 int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
37 };
38
39 struct net_dev_queue {
40 int id;
41 struct net_dev *ndev;
42 struct virt_queue vq;
43 pthread_t thread;
44 struct mutex lock;
45 pthread_cond_t cond;
46 };
47
48 struct net_dev {
49 struct mutex mutex;
50 struct virtio_device vdev;
51 struct list_head list;
52
53 struct net_dev_queue queues[VIRTIO_NET_NUM_QUEUES * 2 + 1];
54 struct virtio_net_config config;
55 u32 queue_pairs;
56
57 int vhost_fd;
58 int tap_fd;
59 char tap_name[IFNAMSIZ];
60 bool tap_ufo;
61
62 int mode;
63
64 struct uip_info info;
65 struct net_dev_operations *ops;
66 struct kvm *kvm;
67
68 struct virtio_net_params *params;
69 };
70
71 static LIST_HEAD(ndevs);
72 static int compat_id = -1;
73
74 #define MAX_PACKET_SIZE 65550
75
has_virtio_feature(struct net_dev * ndev,u32 feature)76 static bool has_virtio_feature(struct net_dev *ndev, u32 feature)
77 {
78 return ndev->vdev.features & (1 << feature);
79 }
80
virtio_net_hdr_len(struct net_dev * ndev)81 static int virtio_net_hdr_len(struct net_dev *ndev)
82 {
83 if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ||
84 !ndev->vdev.legacy)
85 return sizeof(struct virtio_net_hdr_mrg_rxbuf);
86
87 return sizeof(struct virtio_net_hdr);
88 }
89
virtio_net_rx_thread(void * p)90 static void *virtio_net_rx_thread(void *p)
91 {
92 struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
93 struct net_dev_queue *queue = p;
94 struct virt_queue *vq = &queue->vq;
95 struct net_dev *ndev = queue->ndev;
96 struct kvm *kvm;
97 u16 out, in;
98 u16 head;
99 int len, copied;
100
101 kvm__set_thread_name("virtio-net-rx");
102
103 kvm = ndev->kvm;
104 while (1) {
105 mutex_lock(&queue->lock);
106 if (!virt_queue__available(vq))
107 pthread_cond_wait(&queue->cond, &queue->lock.mutex);
108 mutex_unlock(&queue->lock);
109
110 while (virt_queue__available(vq)) {
111 unsigned char buffer[MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)];
112 struct iovec dummy_iov = {
113 .iov_base = buffer,
114 .iov_len = sizeof(buffer),
115 };
116 struct virtio_net_hdr_mrg_rxbuf *hdr;
117 u16 num_buffers;
118
119 len = ndev->ops->rx(&dummy_iov, 1, ndev);
120 if (len < 0) {
121 pr_warning("%s: rx on vq %u failed (%d), exiting thread\n",
122 __func__, queue->id, len);
123 goto out_err;
124 }
125
126 copied = num_buffers = 0;
127 head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
128 hdr = iov[0].iov_base;
129 while (copied < len) {
130 size_t iovsize = min_t(size_t, len - copied, iov_size(iov, in));
131
132 memcpy_toiovec(iov, buffer + copied, iovsize);
133 copied += iovsize;
134 virt_queue__set_used_elem_no_update(vq, head, iovsize, num_buffers++);
135 if (copied == len)
136 break;
137 while (!virt_queue__available(vq))
138 sleep(0);
139 head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
140 }
141
142 /*
143 * The device MUST set num_buffers, except in the case
144 * where the legacy driver did not negotiate
145 * VIRTIO_NET_F_MRG_RXBUF and the field does not exist.
146 */
147 if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ||
148 !ndev->vdev.legacy)
149 hdr->num_buffers = virtio_host_to_guest_u16(vq->endian, num_buffers);
150
151 virt_queue__used_idx_advance(vq, num_buffers);
152
153 /* We should interrupt guest right now, otherwise latency is huge. */
154 if (virtio_queue__should_signal(vq))
155 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id);
156 }
157 }
158
159 out_err:
160 pthread_exit(NULL);
161 return NULL;
162
163 }
164
virtio_net_tx_thread(void * p)165 static void *virtio_net_tx_thread(void *p)
166 {
167 struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
168 struct net_dev_queue *queue = p;
169 struct virt_queue *vq = &queue->vq;
170 struct net_dev *ndev = queue->ndev;
171 struct kvm *kvm;
172 u16 out, in;
173 u16 head;
174 int len;
175
176 kvm__set_thread_name("virtio-net-tx");
177
178 kvm = ndev->kvm;
179
180 while (1) {
181 mutex_lock(&queue->lock);
182 if (!virt_queue__available(vq))
183 pthread_cond_wait(&queue->cond, &queue->lock.mutex);
184 mutex_unlock(&queue->lock);
185
186 while (virt_queue__available(vq)) {
187 head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
188 len = ndev->ops->tx(iov, out, ndev);
189 if (len < 0) {
190 pr_warning("%s: tx on vq %u failed (%d)\n",
191 __func__, queue->id, errno);
192 goto out_err;
193 }
194
195 virt_queue__set_used_elem(vq, head, len);
196 }
197
198 if (virtio_queue__should_signal(vq))
199 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id);
200 }
201
202 out_err:
203 pthread_exit(NULL);
204 return NULL;
205 }
206
virtio_net_handle_mq(struct kvm * kvm,struct net_dev * ndev,struct virtio_net_ctrl_hdr * ctrl)207 static virtio_net_ctrl_ack virtio_net_handle_mq(struct kvm* kvm, struct net_dev *ndev, struct virtio_net_ctrl_hdr *ctrl)
208 {
209 /* Not much to do here */
210 return VIRTIO_NET_OK;
211 }
212
virtio_net_ctrl_thread(void * p)213 static void *virtio_net_ctrl_thread(void *p)
214 {
215 struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
216 struct net_dev_queue *queue = p;
217 struct virt_queue *vq = &queue->vq;
218 struct net_dev *ndev = queue->ndev;
219 u16 out, in, head;
220 struct kvm *kvm = ndev->kvm;
221 struct virtio_net_ctrl_hdr ctrl;
222 virtio_net_ctrl_ack ack;
223 size_t len;
224
225 kvm__set_thread_name("virtio-net-ctrl");
226
227 while (1) {
228 mutex_lock(&queue->lock);
229 if (!virt_queue__available(vq))
230 pthread_cond_wait(&queue->cond, &queue->lock.mutex);
231 mutex_unlock(&queue->lock);
232
233 while (virt_queue__available(vq)) {
234 head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
235 len = min(iov_size(iov, in), sizeof(ctrl));
236 memcpy_fromiovec((void *)&ctrl, iov, len);
237
238 switch (ctrl.class) {
239 case VIRTIO_NET_CTRL_MQ:
240 ack = virtio_net_handle_mq(kvm, ndev, &ctrl);
241 break;
242 default:
243 ack = VIRTIO_NET_ERR;
244 break;
245 }
246 memcpy_toiovec(iov + in, &ack, sizeof(ack));
247 virt_queue__set_used_elem(vq, head, sizeof(ack));
248 }
249
250 if (virtio_queue__should_signal(vq))
251 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id);
252 }
253
254 pthread_exit(NULL);
255
256 return NULL;
257 }
258
virtio_net_handle_callback(struct kvm * kvm,struct net_dev * ndev,int queue)259 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue)
260 {
261 struct net_dev_queue *net_queue = &ndev->queues[queue];
262
263 if ((u32)queue >= (ndev->queue_pairs * 2 + 1)) {
264 pr_warning("Unknown queue index %u", queue);
265 return;
266 }
267
268 mutex_lock(&net_queue->lock);
269 pthread_cond_signal(&net_queue->cond);
270 mutex_unlock(&net_queue->lock);
271 }
272
virtio_net_request_tap(struct net_dev * ndev,struct ifreq * ifr,const char * tapname)273 static int virtio_net_request_tap(struct net_dev *ndev, struct ifreq *ifr,
274 const char *tapname)
275 {
276 int ret;
277
278 memset(ifr, 0, sizeof(*ifr));
279 ifr->ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
280 if (tapname)
281 strlcpy(ifr->ifr_name, tapname, sizeof(ifr->ifr_name));
282
283 ret = ioctl(ndev->tap_fd, TUNSETIFF, ifr);
284
285 if (ret >= 0)
286 strlcpy(ndev->tap_name, ifr->ifr_name, sizeof(ndev->tap_name));
287 return ret;
288 }
289
virtio_net_exec_script(const char * script,const char * tap_name)290 static int virtio_net_exec_script(const char* script, const char *tap_name)
291 {
292 pid_t pid;
293 int status;
294
295 pid = vfork();
296 if (pid == 0) {
297 execl(script, script, tap_name, NULL);
298 _exit(1);
299 } else {
300 waitpid(pid, &status, 0);
301 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
302 pr_warning("Fail to setup tap by %s", script);
303 return -1;
304 }
305 }
306 return 0;
307 }
308
virtio_net__tap_init(struct net_dev * ndev)309 static bool virtio_net__tap_init(struct net_dev *ndev)
310 {
311 int sock = socket(AF_INET, SOCK_STREAM, 0);
312 int hdr_len;
313 struct sockaddr_in sin = {0};
314 struct ifreq ifr;
315 const struct virtio_net_params *params = ndev->params;
316 bool skipconf = !!params->tapif;
317
318 hdr_len = virtio_net_hdr_len(ndev);
319 if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
320 pr_warning("Config tap device TUNSETVNETHDRSZ error");
321
322 if (strcmp(params->script, "none")) {
323 if (virtio_net_exec_script(params->script, ndev->tap_name) < 0)
324 goto fail;
325 } else if (!skipconf) {
326 memset(&ifr, 0, sizeof(ifr));
327 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ifr.ifr_name));
328 sin.sin_addr.s_addr = inet_addr(params->host_ip);
329 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
330 ifr.ifr_addr.sa_family = AF_INET;
331 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
332 pr_warning("Could not set ip address on tap device");
333 goto fail;
334 }
335 }
336
337 if (!skipconf) {
338 memset(&ifr, 0, sizeof(ifr));
339 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ifr.ifr_name));
340 ioctl(sock, SIOCGIFFLAGS, &ifr);
341 ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
342 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
343 pr_warning("Could not bring tap device up");
344 }
345
346 close(sock);
347
348 return 1;
349
350 fail:
351 if (sock >= 0)
352 close(sock);
353 if (ndev->tap_fd >= 0)
354 close(ndev->tap_fd);
355
356 return 0;
357 }
358
virtio_net__tap_exit(struct net_dev * ndev)359 static void virtio_net__tap_exit(struct net_dev *ndev)
360 {
361 int sock;
362 struct ifreq ifr;
363
364 if (ndev->params->tapif)
365 return;
366
367 sock = socket(AF_INET, SOCK_STREAM, 0);
368 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ifr.ifr_name));
369 ioctl(sock, SIOCGIFFLAGS, &ifr);
370 ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING);
371 if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0)
372 pr_warning("Count not bring tap device down");
373 close(sock);
374 }
375
virtio_net__tap_create(struct net_dev * ndev)376 static bool virtio_net__tap_create(struct net_dev *ndev)
377 {
378 int offload;
379 struct ifreq ifr;
380 const struct virtio_net_params *params = ndev->params;
381 bool macvtap = (!!params->tapif) && (params->tapif[0] == '/');
382
383 /* Did the user already gave us the FD? */
384 if (params->fd)
385 ndev->tap_fd = params->fd;
386 else {
387 const char *tap_file = "/dev/net/tun";
388
389 /* Did the user ask us to use macvtap? */
390 if (macvtap)
391 tap_file = params->tapif;
392
393 ndev->tap_fd = open(tap_file, O_RDWR);
394 if (ndev->tap_fd < 0) {
395 pr_warning("Unable to open %s", tap_file);
396 return 0;
397 }
398 }
399
400 if (!macvtap &&
401 virtio_net_request_tap(ndev, &ifr, params->tapif) < 0) {
402 pr_warning("Config tap device error. Are you root?");
403 goto fail;
404 }
405
406 /*
407 * The UFO support had been removed from kernel in commit:
408 * ID: fb652fdfe83710da0ca13448a41b7ed027d0a984
409 * https://www.spinics.net/lists/netdev/msg443562.html
410 * In oder to support the older kernels without this commit,
411 * we set the TUN_F_UFO to offload by default to test the status of
412 * UFO kernel support.
413 */
414 ndev->tap_ufo = true;
415 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
416 if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
417 /*
418 * Is this failure caused by kernel remove the UFO support?
419 * Try TUNSETOFFLOAD without TUN_F_UFO.
420 */
421 offload &= ~TUN_F_UFO;
422 if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
423 pr_warning("Config tap device TUNSETOFFLOAD error");
424 goto fail;
425 }
426 ndev->tap_ufo = false;
427 }
428
429 return 1;
430
431 fail:
432 if ((ndev->tap_fd >= 0) || (!params->fd) )
433 close(ndev->tap_fd);
434
435 return 0;
436 }
437
tap_ops_tx(struct iovec * iov,u16 out,struct net_dev * ndev)438 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
439 {
440 return writev(ndev->tap_fd, iov, out);
441 }
442
tap_ops_rx(struct iovec * iov,u16 in,struct net_dev * ndev)443 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
444 {
445 return readv(ndev->tap_fd, iov, in);
446 }
447
uip_ops_tx(struct iovec * iov,u16 out,struct net_dev * ndev)448 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
449 {
450 return uip_tx(iov, out, &ndev->info);
451 }
452
uip_ops_rx(struct iovec * iov,u16 in,struct net_dev * ndev)453 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
454 {
455 return uip_rx(iov, in, &ndev->info);
456 }
457
458 static struct net_dev_operations tap_ops = {
459 .rx = tap_ops_rx,
460 .tx = tap_ops_tx,
461 };
462
463 static struct net_dev_operations uip_ops = {
464 .rx = uip_ops_rx,
465 .tx = uip_ops_tx,
466 };
467
get_config(struct kvm * kvm,void * dev)468 static u8 *get_config(struct kvm *kvm, void *dev)
469 {
470 struct net_dev *ndev = dev;
471
472 return ((u8 *)(&ndev->config));
473 }
474
get_config_size(struct kvm * kvm,void * dev)475 static size_t get_config_size(struct kvm *kvm, void *dev)
476 {
477 struct net_dev *ndev = dev;
478
479 return sizeof(ndev->config);
480 }
481
get_host_features(struct kvm * kvm,void * dev)482 static u64 get_host_features(struct kvm *kvm, void *dev)
483 {
484 u64 features;
485 struct net_dev *ndev = dev;
486
487 features = 1UL << VIRTIO_NET_F_MAC
488 | 1UL << VIRTIO_NET_F_CSUM
489 | 1UL << VIRTIO_NET_F_HOST_TSO4
490 | 1UL << VIRTIO_NET_F_HOST_TSO6
491 | 1UL << VIRTIO_NET_F_GUEST_TSO4
492 | 1UL << VIRTIO_NET_F_GUEST_TSO6
493 | 1UL << VIRTIO_RING_F_EVENT_IDX
494 | 1UL << VIRTIO_RING_F_INDIRECT_DESC
495 | 1UL << VIRTIO_NET_F_CTRL_VQ
496 | 1UL << VIRTIO_NET_F_MRG_RXBUF
497 | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0)
498 | 1UL << VIRTIO_F_ANY_LAYOUT;
499
500 /*
501 * The UFO feature for host and guest only can be enabled when the
502 * kernel has TAP UFO support.
503 */
504 if (ndev->tap_ufo)
505 features |= (1UL << VIRTIO_NET_F_HOST_UFO
506 | 1UL << VIRTIO_NET_F_GUEST_UFO);
507
508 if (ndev->vhost_fd) {
509 u64 vhost_features;
510
511 if (ioctl(ndev->vhost_fd, VHOST_GET_FEATURES, &vhost_features) != 0)
512 die_perror("VHOST_GET_FEATURES failed");
513
514 features &= vhost_features;
515 }
516
517 return features;
518 }
519
virtio_net_start(struct net_dev * ndev)520 static void virtio_net_start(struct net_dev *ndev)
521 {
522 /* VHOST_NET_F_VIRTIO_NET_HDR clashes with VIRTIO_F_ANY_LAYOUT! */
523 u64 features = ndev->vdev.features & ~(1UL << VHOST_NET_F_VIRTIO_NET_HDR);
524
525 if (ndev->mode == NET_MODE_TAP) {
526 if (!virtio_net__tap_init(ndev))
527 die_perror("TAP device initialized failed because");
528
529 if (ndev->vhost_fd && virtio_vhost_set_features(ndev->vhost_fd,
530 features))
531 die_perror("VHOST_SET_FEATURES failed");
532 } else {
533 ndev->info.vnet_hdr_len = virtio_net_hdr_len(ndev);
534 uip_init(&ndev->info);
535 }
536 }
537
virtio_net_stop(struct net_dev * ndev)538 static void virtio_net_stop(struct net_dev *ndev)
539 {
540 /* Undo whatever start() did */
541 if (ndev->mode == NET_MODE_TAP)
542 virtio_net__tap_exit(ndev);
543 else
544 uip_exit(&ndev->info);
545 }
546
virtio_net_update_endian(struct net_dev * ndev)547 static void virtio_net_update_endian(struct net_dev *ndev)
548 {
549 struct virtio_net_config *conf = &ndev->config;
550
551 conf->status = virtio_host_to_guest_u16(ndev->vdev.endian,
552 VIRTIO_NET_S_LINK_UP);
553 conf->max_virtqueue_pairs = virtio_host_to_guest_u16(ndev->vdev.endian,
554 ndev->queue_pairs);
555
556 /* Let TAP know about vnet header endianness */
557 if (ndev->mode == NET_MODE_TAP &&
558 ndev->vdev.endian != VIRTIO_ENDIAN_HOST) {
559 int enable_val = 1, disable_val = 0;
560 int enable_req, disable_req;
561
562 if (ndev->vdev.endian == VIRTIO_ENDIAN_LE) {
563 enable_req = TUNSETVNETLE;
564 disable_req = TUNSETVNETBE;
565 } else {
566 enable_req = TUNSETVNETBE;
567 disable_req = TUNSETVNETLE;
568 }
569
570 ioctl(ndev->tap_fd, disable_req, &disable_val);
571 if (ioctl(ndev->tap_fd, enable_req, &enable_val) < 0)
572 pr_err("Config tap device TUNSETVNETLE/BE error");
573 }
574 }
575
notify_status(struct kvm * kvm,void * dev,u32 status)576 static void notify_status(struct kvm *kvm, void *dev, u32 status)
577 {
578 struct net_dev *ndev = dev;
579
580 if (status & VIRTIO__STATUS_CONFIG)
581 virtio_net_update_endian(ndev);
582
583 if (status & VIRTIO__STATUS_START)
584 virtio_net_start(dev);
585 else if (status & VIRTIO__STATUS_STOP)
586 virtio_net_stop(dev);
587 }
588
is_ctrl_vq(struct net_dev * ndev,u32 vq)589 static bool is_ctrl_vq(struct net_dev *ndev, u32 vq)
590 {
591 return vq == (u32)(ndev->queue_pairs * 2);
592 }
593
init_vq(struct kvm * kvm,void * dev,u32 vq)594 static int init_vq(struct kvm *kvm, void *dev, u32 vq)
595 {
596 struct vhost_vring_file file = { .index = vq };
597 struct net_dev_queue *net_queue;
598 struct net_dev *ndev = dev;
599 struct virt_queue *queue;
600 int r;
601
602 compat__remove_message(compat_id);
603
604 net_queue = &ndev->queues[vq];
605 net_queue->id = vq;
606 net_queue->ndev = ndev;
607 queue = &net_queue->vq;
608 virtio_init_device_vq(kvm, &ndev->vdev, queue, VIRTIO_NET_QUEUE_SIZE);
609
610 mutex_init(&net_queue->lock);
611 pthread_cond_init(&net_queue->cond, NULL);
612 if (is_ctrl_vq(ndev, vq)) {
613 pthread_create(&net_queue->thread, NULL, virtio_net_ctrl_thread,
614 net_queue);
615
616 return 0;
617 } else if (ndev->vhost_fd == 0 ) {
618 if (vq & 1)
619 pthread_create(&net_queue->thread, NULL,
620 virtio_net_tx_thread, net_queue);
621 else
622 pthread_create(&net_queue->thread, NULL,
623 virtio_net_rx_thread, net_queue);
624
625 return 0;
626 }
627
628 virtio_vhost_set_vring(kvm, ndev->vhost_fd, vq, queue);
629
630 file.fd = ndev->tap_fd;
631 r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file);
632 if (r < 0)
633 die_perror("VHOST_NET_SET_BACKEND failed");
634
635 return 0;
636 }
637
exit_vq(struct kvm * kvm,void * dev,u32 vq)638 static void exit_vq(struct kvm *kvm, void *dev, u32 vq)
639 {
640 struct net_dev *ndev = dev;
641 struct net_dev_queue *queue = &ndev->queues[vq];
642
643 virtio_vhost_reset_vring(kvm, ndev->vhost_fd, vq, &queue->vq);
644
645 /*
646 * TODO: vhost reset owner. It's the only way to cleanly stop vhost, but
647 * we can't restart it at the moment.
648 */
649 if (ndev->vhost_fd && !is_ctrl_vq(ndev, vq)) {
650 pr_warning("Cannot reset VHOST queue");
651 ioctl(ndev->vhost_fd, VHOST_RESET_OWNER);
652 return;
653 }
654
655 /*
656 * Threads are waiting on cancellation points (readv or
657 * pthread_cond_wait) and should stop gracefully.
658 */
659 pthread_cancel(queue->thread);
660 pthread_join(queue->thread, NULL);
661 }
662
notify_vq_gsi(struct kvm * kvm,void * dev,u32 vq,u32 gsi)663 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
664 {
665 struct net_dev *ndev = dev;
666 struct net_dev_queue *queue = &ndev->queues[vq];
667
668 if (ndev->vhost_fd == 0 || is_ctrl_vq(ndev, vq))
669 return;
670
671 virtio_vhost_set_vring_irqfd(kvm, gsi, &queue->vq);
672 }
673
notify_vq_eventfd(struct kvm * kvm,void * dev,u32 vq,u32 efd)674 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
675 {
676 struct net_dev *ndev = dev;
677
678 if (ndev->vhost_fd == 0 || is_ctrl_vq(ndev, vq))
679 return;
680
681 virtio_vhost_set_vring_kick(kvm, ndev->vhost_fd, vq, efd);
682 }
683
notify_vq(struct kvm * kvm,void * dev,u32 vq)684 static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
685 {
686 struct net_dev *ndev = dev;
687
688 virtio_net_handle_callback(kvm, ndev, vq);
689
690 return 0;
691 }
692
get_vq(struct kvm * kvm,void * dev,u32 vq)693 static struct virt_queue *get_vq(struct kvm *kvm, void *dev, u32 vq)
694 {
695 struct net_dev *ndev = dev;
696
697 return &ndev->queues[vq].vq;
698 }
699
get_size_vq(struct kvm * kvm,void * dev,u32 vq)700 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
701 {
702 /* FIXME: dynamic */
703 return VIRTIO_NET_QUEUE_SIZE;
704 }
705
set_size_vq(struct kvm * kvm,void * dev,u32 vq,int size)706 static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size)
707 {
708 /* FIXME: dynamic */
709 return size;
710 }
711
get_vq_count(struct kvm * kvm,void * dev)712 static unsigned int get_vq_count(struct kvm *kvm, void *dev)
713 {
714 struct net_dev *ndev = dev;
715
716 return ndev->queue_pairs * 2 + 1;
717 }
718
719 static struct virtio_ops net_dev_virtio_ops = {
720 .get_config = get_config,
721 .get_config_size = get_config_size,
722 .get_host_features = get_host_features,
723 .get_vq_count = get_vq_count,
724 .init_vq = init_vq,
725 .exit_vq = exit_vq,
726 .get_vq = get_vq,
727 .get_size_vq = get_size_vq,
728 .set_size_vq = set_size_vq,
729 .notify_vq = notify_vq,
730 .notify_vq_gsi = notify_vq_gsi,
731 .notify_vq_eventfd = notify_vq_eventfd,
732 .notify_status = notify_status,
733 };
734
virtio_net__vhost_init(struct kvm * kvm,struct net_dev * ndev)735 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev)
736 {
737 if (ndev->queue_pairs > 1) {
738 pr_warning("multiqueue is not supported with vhost yet");
739 return;
740 }
741
742 ndev->vhost_fd = open("/dev/vhost-net", O_RDWR);
743 if (ndev->vhost_fd < 0)
744 die_perror("Failed openning vhost-net device");
745
746 virtio_vhost_init(kvm, ndev->vhost_fd);
747
748 ndev->vdev.use_vhost = true;
749 }
750
str_to_mac(const char * str,char * mac)751 static inline void str_to_mac(const char *str, char *mac)
752 {
753 sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
754 mac, mac+1, mac+2, mac+3, mac+4, mac+5);
755 }
set_net_param(struct kvm * kvm,struct virtio_net_params * p,const char * param,const char * val)756 static int set_net_param(struct kvm *kvm, struct virtio_net_params *p,
757 const char *param, const char *val)
758 {
759 if (strcmp(param, "guest_mac") == 0) {
760 str_to_mac(val, p->guest_mac);
761 } else if (strcmp(param, "mode") == 0) {
762 if (!strncmp(val, "user", 4)) {
763 int i;
764
765 for (i = 0; i < kvm->cfg.num_net_devices; i++)
766 if (kvm->cfg.net_params[i].mode == NET_MODE_USER)
767 die("Only one usermode network device allowed at a time");
768 p->mode = NET_MODE_USER;
769 } else if (!strncmp(val, "tap", 3)) {
770 p->mode = NET_MODE_TAP;
771 } else if (!strncmp(val, "none", 4)) {
772 kvm->cfg.no_net = 1;
773 return -1;
774 } else
775 die("Unknown network mode %s, please use user, tap or none", kvm->cfg.network);
776 } else if (strcmp(param, "script") == 0) {
777 p->script = strdup(val);
778 } else if (strcmp(param, "downscript") == 0) {
779 p->downscript = strdup(val);
780 } else if (strcmp(param, "guest_ip") == 0) {
781 p->guest_ip = strdup(val);
782 } else if (strcmp(param, "host_ip") == 0) {
783 p->host_ip = strdup(val);
784 } else if (strcmp(param, "trans") == 0) {
785 p->trans = strdup(val);
786 } else if (strcmp(param, "tapif") == 0) {
787 p->tapif = strdup(val);
788 } else if (strcmp(param, "vhost") == 0) {
789 p->vhost = atoi(val);
790 } else if (strcmp(param, "fd") == 0) {
791 p->fd = atoi(val);
792 } else if (strcmp(param, "mq") == 0) {
793 p->mq = atoi(val);
794 } else
795 die("Unknown network parameter %s", param);
796
797 return 0;
798 }
799
netdev_parser(const struct option * opt,const char * arg,int unset)800 int netdev_parser(const struct option *opt, const char *arg, int unset)
801 {
802 struct virtio_net_params p;
803 char *buf = NULL, *cmd = NULL, *cur = NULL;
804 bool on_cmd = true;
805 struct kvm *kvm = opt->ptr;
806
807 if (arg) {
808 buf = strdup(arg);
809 if (buf == NULL)
810 die("Failed allocating new net buffer");
811 cur = strtok(buf, ",=");
812 }
813
814 p = (struct virtio_net_params) {
815 .guest_ip = DEFAULT_GUEST_ADDR,
816 .host_ip = DEFAULT_HOST_ADDR,
817 .script = DEFAULT_SCRIPT,
818 .downscript = DEFAULT_SCRIPT,
819 .mode = NET_MODE_TAP,
820 };
821
822 str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
823 p.guest_mac[5] += kvm->cfg.num_net_devices;
824
825 while (cur) {
826 if (on_cmd) {
827 cmd = cur;
828 } else {
829 if (set_net_param(kvm, &p, cmd, cur) < 0)
830 goto done;
831 }
832 on_cmd = !on_cmd;
833
834 cur = strtok(NULL, ",=");
835 };
836
837 kvm->cfg.num_net_devices++;
838
839 kvm->cfg.net_params = realloc(kvm->cfg.net_params, kvm->cfg.num_net_devices * sizeof(*kvm->cfg.net_params));
840 if (kvm->cfg.net_params == NULL)
841 die("Failed adding new network device");
842
843 kvm->cfg.net_params[kvm->cfg.num_net_devices - 1] = p;
844
845 done:
846 free(buf);
847 return 0;
848 }
849
virtio_net__init_one(struct virtio_net_params * params)850 static int virtio_net__init_one(struct virtio_net_params *params)
851 {
852 enum virtio_trans trans = params->kvm->cfg.virtio_transport;
853 struct net_dev *ndev;
854 struct virtio_ops *ops;
855 int i, r;
856
857 ndev = calloc(1, sizeof(struct net_dev));
858 if (ndev == NULL)
859 return -ENOMEM;
860
861 list_add_tail(&ndev->list, &ndevs);
862
863 ops = malloc(sizeof(*ops));
864 if (ops == NULL)
865 return -ENOMEM;
866
867 ndev->kvm = params->kvm;
868 ndev->params = params;
869
870 mutex_init(&ndev->mutex);
871 ndev->queue_pairs = max(1, min(VIRTIO_NET_NUM_QUEUES, params->mq));
872
873 for (i = 0 ; i < 6 ; i++) {
874 ndev->config.mac[i] = params->guest_mac[i];
875 ndev->info.guest_mac.addr[i] = params->guest_mac[i];
876 ndev->info.host_mac.addr[i] = params->host_mac[i];
877 }
878
879 ndev->mode = params->mode;
880 if (ndev->mode == NET_MODE_TAP) {
881 ndev->ops = &tap_ops;
882 if (!virtio_net__tap_create(ndev))
883 die_perror("You have requested a TAP device, but creation of one has failed because");
884 } else {
885 ndev->info.host_ip = ntohl(inet_addr(params->host_ip));
886 ndev->info.guest_ip = ntohl(inet_addr(params->guest_ip));
887 ndev->info.guest_netmask = ntohl(inet_addr("255.255.255.0"));
888 ndev->info.buf_nr = 20,
889 ndev->ops = &uip_ops;
890 uip_static_init(&ndev->info);
891 }
892
893 *ops = net_dev_virtio_ops;
894
895 if (params->trans) {
896 if (strcmp(params->trans, "mmio") == 0)
897 trans = VIRTIO_MMIO;
898 else if (strcmp(params->trans, "pci") == 0)
899 trans = VIRTIO_PCI;
900 else
901 pr_warning("virtio-net: Unknown transport method : %s, "
902 "falling back to %s.", params->trans,
903 virtio_trans_name(trans));
904 }
905
906 r = virtio_init(params->kvm, ndev, &ndev->vdev, ops, trans,
907 PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET);
908 if (r < 0) {
909 free(ops);
910 return r;
911 }
912
913 if (params->vhost)
914 virtio_net__vhost_init(params->kvm, ndev);
915
916 if (compat_id == -1)
917 compat_id = virtio_compat_add_message("virtio-net", "CONFIG_VIRTIO_NET");
918
919 return 0;
920 }
921
virtio_net__init(struct kvm * kvm)922 int virtio_net__init(struct kvm *kvm)
923 {
924 int i, r;
925
926 for (i = 0; i < kvm->cfg.num_net_devices; i++) {
927 kvm->cfg.net_params[i].kvm = kvm;
928 r = virtio_net__init_one(&kvm->cfg.net_params[i]);
929 if (r < 0)
930 goto cleanup;
931 }
932
933 if (kvm->cfg.num_net_devices == 0 && kvm->cfg.no_net == 0) {
934 static struct virtio_net_params net_params;
935
936 net_params = (struct virtio_net_params) {
937 .guest_ip = kvm->cfg.guest_ip,
938 .host_ip = kvm->cfg.host_ip,
939 .kvm = kvm,
940 .script = kvm->cfg.script,
941 .mode = NET_MODE_USER,
942 };
943 str_to_mac(kvm->cfg.guest_mac, net_params.guest_mac);
944 str_to_mac(kvm->cfg.host_mac, net_params.host_mac);
945
946 r = virtio_net__init_one(&net_params);
947 if (r < 0)
948 goto cleanup;
949 }
950
951 return 0;
952
953 cleanup:
954 virtio_net__exit(kvm);
955 return r;
956 }
957 virtio_dev_init(virtio_net__init);
958
virtio_net__exit(struct kvm * kvm)959 int virtio_net__exit(struct kvm *kvm)
960 {
961 struct virtio_net_params *params;
962 struct net_dev *ndev;
963 struct list_head *ptr, *n;
964
965 list_for_each_safe(ptr, n, &ndevs) {
966 ndev = list_entry(ptr, struct net_dev, list);
967 params = ndev->params;
968 /* Cleanup any tap device which attached to bridge */
969 if (ndev->mode == NET_MODE_TAP &&
970 strcmp(params->downscript, "none"))
971 virtio_net_exec_script(params->downscript, ndev->tap_name);
972 virtio_net_stop(ndev);
973
974 list_del(&ndev->list);
975 virtio_exit(kvm, &ndev->vdev);
976 free(ndev);
977 }
978
979 return 0;
980 }
981 virtio_dev_exit(virtio_net__exit);
982