xref: /kvmtool/virtio/net.c (revision 49bada437b7f24abebd98a14b2ed9a06dde8827e)
131638bcaSCyrill Gorcunov #include "kvm/virtio-pci-dev.h"
2b5ee1ea7SAsias He #include "kvm/virtio-net.h"
34f56d42cSAsias He #include "kvm/virtio.h"
44f56d42cSAsias He #include "kvm/mutex.h"
54f56d42cSAsias He #include "kvm/util.h"
64f56d42cSAsias He #include "kvm/kvm.h"
72449f6e3SSasha Levin #include "kvm/irq.h"
8b5ee1ea7SAsias He #include "kvm/uip.h"
9cb83de6fSSasha Levin #include "kvm/guest_compat.h"
107668c3a6SSasha Levin #include "kvm/iovec.h"
114f56d42cSAsias He 
129ed67cdcSSasha Levin #include <linux/vhost.h>
134f56d42cSAsias He #include <linux/virtio_net.h>
144f56d42cSAsias He #include <linux/if_tun.h>
1578a2a3e2SSasha Levin #include <linux/types.h>
16c229370aSIngo Molnar 
17c229370aSIngo Molnar #include <arpa/inet.h>
184f56d42cSAsias He #include <net/if.h>
19c229370aSIngo Molnar 
20c229370aSIngo Molnar #include <unistd.h>
214f56d42cSAsias He #include <fcntl.h>
22c229370aSIngo Molnar 
23cb7202c1SSasha Levin #include <sys/socket.h>
24c229370aSIngo Molnar #include <sys/ioctl.h>
25c229370aSIngo Molnar #include <sys/types.h>
2673b7d038SAmos Kong #include <sys/wait.h>
279ed67cdcSSasha Levin #include <sys/eventfd.h>
284f56d42cSAsias He 
294059ad8bSAsias He #define VIRTIO_NET_QUEUE_SIZE		256
306585ffb7SSasha Levin #define VIRTIO_NET_NUM_QUEUES		8
314f56d42cSAsias He 
32b4fdde6dSAsias He struct net_dev;
33b4fdde6dSAsias He 
34b4fdde6dSAsias He struct net_dev_operations {
35b4fdde6dSAsias He 	int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
36b4fdde6dSAsias He 	int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
37b4fdde6dSAsias He };
38b4fdde6dSAsias He 
39ad96e867SJean-Philippe Brucker struct net_dev_queue {
40ad96e867SJean-Philippe Brucker 	int				id;
41ad96e867SJean-Philippe Brucker 	struct net_dev			*ndev;
42ad96e867SJean-Philippe Brucker 	struct virt_queue		vq;
43ad96e867SJean-Philippe Brucker 	pthread_t			thread;
44ad96e867SJean-Philippe Brucker 	struct mutex			lock;
45ad96e867SJean-Philippe Brucker 	pthread_cond_t			cond;
46*49bada43SJean-Philippe Brucker 	int				gsi;
47*49bada43SJean-Philippe Brucker 	int				irqfd;
48ad96e867SJean-Philippe Brucker };
49ad96e867SJean-Philippe Brucker 
508626798bSAsias He struct net_dev {
51d3476f7dSSasha Levin 	struct mutex			mutex;
5202eca50cSAsias He 	struct virtio_device		vdev;
539a6d73f1SSasha Levin 	struct list_head		list;
544f56d42cSAsias He 
55ad96e867SJean-Philippe Brucker 	struct net_dev_queue		queues[VIRTIO_NET_NUM_QUEUES * 2 + 1];
56c229370aSIngo Molnar 	struct virtio_net_config	config;
57ad96e867SJean-Philippe Brucker 	u32				features, queue_pairs;
58c4aa7c02SPekka Enberg 
599ed67cdcSSasha Levin 	int				vhost_fd;
604f56d42cSAsias He 	int				tap_fd;
614f56d42cSAsias He 	char				tap_name[IFNAMSIZ];
6259ee54ebSWei Chen 	bool				tap_ufo;
63bb1a32f1SAsias He 
64bb1a32f1SAsias He 	int				mode;
65bb1a32f1SAsias He 
66b5ee1ea7SAsias He 	struct uip_info			info;
67b4fdde6dSAsias He 	struct net_dev_operations	*ops;
689a6d73f1SSasha Levin 	struct kvm			*kvm;
698c0ae74dSSasha Levin 
708c0ae74dSSasha Levin 	struct virtio_net_params	*params;
714f56d42cSAsias He };
724f56d42cSAsias He 
739a6d73f1SSasha Levin static LIST_HEAD(ndevs);
74312c62d1SSasha Levin static int compat_id = -1;
754f56d42cSAsias He 
767668c3a6SSasha Levin #define MAX_PACKET_SIZE 65550
777668c3a6SSasha Levin 
787668c3a6SSasha Levin static bool has_virtio_feature(struct net_dev *ndev, u32 feature)
797668c3a6SSasha Levin {
807668c3a6SSasha Levin 	return ndev->features & (1 << feature);
817668c3a6SSasha Levin }
827668c3a6SSasha Levin 
838ed60bbeSMarc Zyngier static void virtio_net_fix_tx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev)
848ed60bbeSMarc Zyngier {
858ed60bbeSMarc Zyngier 	hdr->hdr_len		= virtio_guest_to_host_u16(&ndev->vdev, hdr->hdr_len);
868ed60bbeSMarc Zyngier 	hdr->gso_size		= virtio_guest_to_host_u16(&ndev->vdev, hdr->gso_size);
878ed60bbeSMarc Zyngier 	hdr->csum_start		= virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_start);
888ed60bbeSMarc Zyngier 	hdr->csum_offset	= virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_offset);
898ed60bbeSMarc Zyngier }
908ed60bbeSMarc Zyngier 
913fea89a9SWill Deacon static void virtio_net_fix_rx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev)
928ed60bbeSMarc Zyngier {
933fea89a9SWill Deacon 	hdr->hdr_len		= virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr_len);
943fea89a9SWill Deacon 	hdr->gso_size		= virtio_host_to_guest_u16(&ndev->vdev, hdr->gso_size);
953fea89a9SWill Deacon 	hdr->csum_start		= virtio_host_to_guest_u16(&ndev->vdev, hdr->csum_start);
963fea89a9SWill Deacon 	hdr->csum_offset	= virtio_host_to_guest_u16(&ndev->vdev, hdr->csum_offset);
978ed60bbeSMarc Zyngier }
988ed60bbeSMarc Zyngier 
99c4aa7c02SPekka Enberg static void *virtio_net_rx_thread(void *p)
1004f56d42cSAsias He {
1014f56d42cSAsias He 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
102ad96e867SJean-Philippe Brucker 	struct net_dev_queue *queue = p;
103ad96e867SJean-Philippe Brucker 	struct virt_queue *vq = &queue->vq;
104ad96e867SJean-Philippe Brucker 	struct net_dev *ndev = queue->ndev;
10543835ac9SSasha Levin 	struct kvm *kvm;
1063fdf659dSSasha Levin 	u16 out, in;
1073fdf659dSSasha Levin 	u16 head;
1086d6cc14bSWill Deacon 	int len, copied;
1094f56d42cSAsias He 
110a4d8c55eSSasha Levin 	kvm__set_thread_name("virtio-net-rx");
111a4d8c55eSSasha Levin 
1129a6d73f1SSasha Levin 	kvm = ndev->kvm;
113c4aa7c02SPekka Enberg 	while (1) {
114ad96e867SJean-Philippe Brucker 		mutex_lock(&queue->lock);
115c4aa7c02SPekka Enberg 		if (!virt_queue__available(vq))
116ad96e867SJean-Philippe Brucker 			pthread_cond_wait(&queue->cond, &queue->lock.mutex);
117ad96e867SJean-Philippe Brucker 		mutex_unlock(&queue->lock);
1184f56d42cSAsias He 
1194f56d42cSAsias He 		while (virt_queue__available(vq)) {
1207668c3a6SSasha Levin 			unsigned char buffer[MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)];
1217668c3a6SSasha Levin 			struct iovec dummy_iov = {
1227668c3a6SSasha Levin 				.iov_base = buffer,
1237668c3a6SSasha Levin 				.iov_len  = sizeof(buffer),
1247668c3a6SSasha Levin 			};
1257668c3a6SSasha Levin 			struct virtio_net_hdr_mrg_rxbuf *hdr;
1263fea89a9SWill Deacon 			u16 num_buffers;
1277f5ffaf5SAsias He 
1287668c3a6SSasha Levin 			len = ndev->ops->rx(&dummy_iov, 1, ndev);
1296d6cc14bSWill Deacon 			if (len < 0) {
1306d6cc14bSWill Deacon 				pr_warning("%s: rx on vq %u failed (%d), exiting thread\n",
131ad96e867SJean-Philippe Brucker 						__func__, queue->id, len);
1326d6cc14bSWill Deacon 				goto out_err;
1336d6cc14bSWill Deacon 			}
1346d6cc14bSWill Deacon 
1353fea89a9SWill Deacon 			copied = num_buffers = 0;
1367668c3a6SSasha Levin 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
1378ed60bbeSMarc Zyngier 			hdr = iov[0].iov_base;
1387668c3a6SSasha Levin 			while (copied < len) {
1396d6cc14bSWill Deacon 				size_t iovsize = min_t(size_t, len - copied, iov_size(iov, in));
1407668c3a6SSasha Levin 
141e2493047SAsias He 				memcpy_toiovec(iov, buffer + copied, iovsize);
1427668c3a6SSasha Levin 				copied += iovsize;
1433fea89a9SWill Deacon 				virt_queue__set_used_elem_no_update(vq, head, iovsize, num_buffers++);
1447668c3a6SSasha Levin 				if (copied == len)
1457668c3a6SSasha Levin 					break;
1467668c3a6SSasha Levin 				while (!virt_queue__available(vq))
1477668c3a6SSasha Levin 					sleep(0);
1487668c3a6SSasha Levin 				head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
1497668c3a6SSasha Levin 			}
1503fea89a9SWill Deacon 
1513fea89a9SWill Deacon 			virtio_net_fix_rx_hdr(&hdr->hdr, ndev);
1523fea89a9SWill Deacon 			if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF))
1533fea89a9SWill Deacon 				hdr->num_buffers = virtio_host_to_guest_u16(vq, num_buffers);
1543fea89a9SWill Deacon 
1553fea89a9SWill Deacon 			virt_queue__used_idx_advance(vq, num_buffers);
1563fea89a9SWill Deacon 
157c4aa7c02SPekka Enberg 			/* We should interrupt guest right now, otherwise latency is huge. */
1586585ffb7SSasha Levin 			if (virtio_queue__should_signal(vq))
159ad96e867SJean-Philippe Brucker 				ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id);
1604f56d42cSAsias He 		}
161c4aa7c02SPekka Enberg 	}
162c4aa7c02SPekka Enberg 
1636d6cc14bSWill Deacon out_err:
164c4aa7c02SPekka Enberg 	pthread_exit(NULL);
165c4aa7c02SPekka Enberg 	return NULL;
166c4aa7c02SPekka Enberg 
167c4aa7c02SPekka Enberg }
168c4aa7c02SPekka Enberg 
169c4aa7c02SPekka Enberg static void *virtio_net_tx_thread(void *p)
1704f56d42cSAsias He {
1714f56d42cSAsias He 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
172ad96e867SJean-Philippe Brucker 	struct net_dev_queue *queue = p;
173ad96e867SJean-Philippe Brucker 	struct virt_queue *vq = &queue->vq;
174ad96e867SJean-Philippe Brucker 	struct net_dev *ndev = queue->ndev;
17543835ac9SSasha Levin 	struct kvm *kvm;
1763fdf659dSSasha Levin 	u16 out, in;
1773fdf659dSSasha Levin 	u16 head;
1784f56d42cSAsias He 	int len;
1794f56d42cSAsias He 
180a4d8c55eSSasha Levin 	kvm__set_thread_name("virtio-net-tx");
181a4d8c55eSSasha Levin 
1829a6d73f1SSasha Levin 	kvm = ndev->kvm;
183c4aa7c02SPekka Enberg 
184c4aa7c02SPekka Enberg 	while (1) {
185ad96e867SJean-Philippe Brucker 		mutex_lock(&queue->lock);
186c4aa7c02SPekka Enberg 		if (!virt_queue__available(vq))
187ad96e867SJean-Philippe Brucker 			pthread_cond_wait(&queue->cond, &queue->lock.mutex);
188ad96e867SJean-Philippe Brucker 		mutex_unlock(&queue->lock);
1894f56d42cSAsias He 
1904f56d42cSAsias He 		while (virt_queue__available(vq)) {
1918ed60bbeSMarc Zyngier 			struct virtio_net_hdr *hdr;
19243835ac9SSasha Levin 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
1938ed60bbeSMarc Zyngier 			hdr = iov[0].iov_base;
1948ed60bbeSMarc Zyngier 			virtio_net_fix_tx_hdr(hdr, ndev);
1959a6d73f1SSasha Levin 			len = ndev->ops->tx(iov, out, ndev);
1966d6cc14bSWill Deacon 			if (len < 0) {
1976d6cc14bSWill Deacon 				pr_warning("%s: tx on vq %u failed (%d)\n",
198ad96e867SJean-Philippe Brucker 						__func__, queue->id, errno);
1996d6cc14bSWill Deacon 				goto out_err;
2006d6cc14bSWill Deacon 			}
2016d6cc14bSWill Deacon 
2024f56d42cSAsias He 			virt_queue__set_used_elem(vq, head, len);
2034f56d42cSAsias He 		}
2044f56d42cSAsias He 
2056585ffb7SSasha Levin 		if (virtio_queue__should_signal(vq))
206ad96e867SJean-Philippe Brucker 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id);
2074f56d42cSAsias He 	}
2084f56d42cSAsias He 
2096d6cc14bSWill Deacon out_err:
210c4aa7c02SPekka Enberg 	pthread_exit(NULL);
211c4aa7c02SPekka Enberg 	return NULL;
212c4aa7c02SPekka Enberg }
213407475bfSPekka Enberg 
2146585ffb7SSasha Levin static virtio_net_ctrl_ack virtio_net_handle_mq(struct kvm* kvm, struct net_dev *ndev, struct virtio_net_ctrl_hdr *ctrl)
2156585ffb7SSasha Levin {
2166585ffb7SSasha Levin 	/* Not much to do here */
2176585ffb7SSasha Levin 	return VIRTIO_NET_OK;
2186585ffb7SSasha Levin }
2196585ffb7SSasha Levin 
2206585ffb7SSasha Levin static void *virtio_net_ctrl_thread(void *p)
221abd4a801SSasha Levin {
222abd4a801SSasha Levin 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
223ad96e867SJean-Philippe Brucker 	struct net_dev_queue *queue = p;
224ad96e867SJean-Philippe Brucker 	struct virt_queue *vq = &queue->vq;
225ad96e867SJean-Philippe Brucker 	struct net_dev *ndev = queue->ndev;
226abd4a801SSasha Levin 	u16 out, in, head;
2276585ffb7SSasha Levin 	struct kvm *kvm = ndev->kvm;
228abd4a801SSasha Levin 	struct virtio_net_ctrl_hdr *ctrl;
229abd4a801SSasha Levin 	virtio_net_ctrl_ack *ack;
230abd4a801SSasha Levin 
231edb4a8a0SSuzuki K. Poulose 	kvm__set_thread_name("virtio-net-ctrl");
232edb4a8a0SSuzuki K. Poulose 
2336585ffb7SSasha Levin 	while (1) {
234ad96e867SJean-Philippe Brucker 		mutex_lock(&queue->lock);
2356585ffb7SSasha Levin 		if (!virt_queue__available(vq))
236ad96e867SJean-Philippe Brucker 			pthread_cond_wait(&queue->cond, &queue->lock.mutex);
237ad96e867SJean-Philippe Brucker 		mutex_unlock(&queue->lock);
2386585ffb7SSasha Levin 
2396585ffb7SSasha Levin 		while (virt_queue__available(vq)) {
240ad96e867SJean-Philippe Brucker 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
241abd4a801SSasha Levin 			ctrl = iov[0].iov_base;
242abd4a801SSasha Levin 			ack = iov[out].iov_base;
243abd4a801SSasha Levin 
244abd4a801SSasha Levin 			switch (ctrl->class) {
2456585ffb7SSasha Levin 			case VIRTIO_NET_CTRL_MQ:
2466585ffb7SSasha Levin 				*ack = virtio_net_handle_mq(kvm, ndev, ctrl);
2476585ffb7SSasha Levin 				break;
248abd4a801SSasha Levin 			default:
249abd4a801SSasha Levin 				*ack = VIRTIO_NET_ERR;
250abd4a801SSasha Levin 				break;
251abd4a801SSasha Levin 			}
252ad96e867SJean-Philippe Brucker 			virt_queue__set_used_elem(vq, head, iov[out].iov_len);
2536585ffb7SSasha Levin 		}
254abd4a801SSasha Levin 
255ad96e867SJean-Philippe Brucker 		if (virtio_queue__should_signal(vq))
256ad96e867SJean-Philippe Brucker 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id);
2576585ffb7SSasha Levin 	}
258abd4a801SSasha Levin 
2596585ffb7SSasha Levin 	pthread_exit(NULL);
260abd4a801SSasha Levin 
2616585ffb7SSasha Levin 	return NULL;
262abd4a801SSasha Levin }
263abd4a801SSasha Levin 
2649a6d73f1SSasha Levin static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue)
2654f56d42cSAsias He {
266ad96e867SJean-Philippe Brucker 	struct net_dev_queue *net_queue = &ndev->queues[queue];
267ad96e867SJean-Philippe Brucker 
26879052597SSasha Levin 	if ((u32)queue >= (ndev->queue_pairs * 2 + 1)) {
2699a6d73f1SSasha Levin 		pr_warning("Unknown queue index %u", queue);
2706585ffb7SSasha Levin 		return;
271c4aa7c02SPekka Enberg 	}
2726585ffb7SSasha Levin 
273ad96e867SJean-Philippe Brucker 	mutex_lock(&net_queue->lock);
274ad96e867SJean-Philippe Brucker 	pthread_cond_signal(&net_queue->cond);
275ad96e867SJean-Philippe Brucker 	mutex_unlock(&net_queue->lock);
2764f56d42cSAsias He }
2774f56d42cSAsias He 
278d2a7ddffSMarc Zyngier static int virtio_net_request_tap(struct net_dev *ndev, struct ifreq *ifr,
279d2a7ddffSMarc Zyngier 				  const char *tapname)
280d2a7ddffSMarc Zyngier {
281d2a7ddffSMarc Zyngier 	int ret;
282d2a7ddffSMarc Zyngier 
283d2a7ddffSMarc Zyngier 	memset(ifr, 0, sizeof(*ifr));
284d2a7ddffSMarc Zyngier 	ifr->ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
285d2a7ddffSMarc Zyngier 	if (tapname)
286d2a7ddffSMarc Zyngier 		strncpy(ifr->ifr_name, tapname, sizeof(ifr->ifr_name));
287d2a7ddffSMarc Zyngier 
288f83dc816SAndreas Herrmann 	ret = ioctl(ndev->tap_fd, TUNSETIFF, ifr);
289d2a7ddffSMarc Zyngier 
290d2a7ddffSMarc Zyngier 	if (ret >= 0)
291d2a7ddffSMarc Zyngier 		strncpy(ndev->tap_name, ifr->ifr_name, sizeof(ndev->tap_name));
292d2a7ddffSMarc Zyngier 	return ret;
293d2a7ddffSMarc Zyngier }
294d2a7ddffSMarc Zyngier 
295eef27ae3SFan Du static int virtio_net_exec_script(const char* script, const char *tap_name)
296eef27ae3SFan Du {
297eef27ae3SFan Du 	pid_t pid;
298eef27ae3SFan Du 	int status;
299eef27ae3SFan Du 
300eef27ae3SFan Du 	pid = fork();
301eef27ae3SFan Du 	if (pid == 0) {
302eef27ae3SFan Du 		execl(script, script, tap_name, NULL);
303eef27ae3SFan Du 		_exit(1);
304eef27ae3SFan Du 	} else {
305eef27ae3SFan Du 		waitpid(pid, &status, 0);
306eef27ae3SFan Du 		if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
307eef27ae3SFan Du 			pr_warning("Fail to setup tap by %s", script);
308eef27ae3SFan Du 			return -1;
309eef27ae3SFan Du 		}
310eef27ae3SFan Du 	}
311eef27ae3SFan Du 	return 0;
312eef27ae3SFan Du }
313eef27ae3SFan Du 
3148c0ae74dSSasha Levin static bool virtio_net__tap_init(struct net_dev *ndev)
3154f56d42cSAsias He {
316cb7202c1SSasha Levin 	int sock = socket(AF_INET, SOCK_STREAM, 0);
31759ee54ebSWei Chen 	int hdr_len;
318cb7202c1SSasha Levin 	struct sockaddr_in sin = {0};
319246c8347SAsias He 	struct ifreq ifr;
3208c0ae74dSSasha Levin 	const struct virtio_net_params *params = ndev->params;
3211dfc7c24SMarc Zyngier 	bool skipconf = !!params->tapif;
3224f56d42cSAsias He 
3237668c3a6SSasha Levin 	hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ?
32448c6b47aSSasha Levin 			sizeof(struct virtio_net_hdr_mrg_rxbuf) :
32548c6b47aSSasha Levin 			sizeof(struct virtio_net_hdr);
3269a6d73f1SSasha Levin 	if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
3274542f276SCyrill Gorcunov 		pr_warning("Config tap device TUNSETVNETHDRSZ error");
328246c8347SAsias He 
32973b7d038SAmos Kong 	if (strcmp(params->script, "none")) {
330eef27ae3SFan Du 		if (virtio_net_exec_script(params->script, ndev->tap_name) < 0)
33173b7d038SAmos Kong 			goto fail;
3321dfc7c24SMarc Zyngier 	} else if (!skipconf) {
333cb7202c1SSasha Levin 		memset(&ifr, 0, sizeof(ifr));
3349a6d73f1SSasha Levin 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
335bdfcfca6SSasha Levin 		sin.sin_addr.s_addr = inet_addr(params->host_ip);
336cb7202c1SSasha Levin 		memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
337cb7202c1SSasha Levin 		ifr.ifr_addr.sa_family = AF_INET;
3383b02f580SSasha Levin 		if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
3394542f276SCyrill Gorcunov 			pr_warning("Could not set ip address on tap device");
3403b02f580SSasha Levin 			goto fail;
3413b02f580SSasha Levin 		}
34273b7d038SAmos Kong 	}
343cb7202c1SSasha Levin 
3441dfc7c24SMarc Zyngier 	if (!skipconf) {
345cb7202c1SSasha Levin 		memset(&ifr, 0, sizeof(ifr));
3469a6d73f1SSasha Levin 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
347cb7202c1SSasha Levin 		ioctl(sock, SIOCGIFFLAGS, &ifr);
348cb7202c1SSasha Levin 		ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
349cb7202c1SSasha Levin 		if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
3504542f276SCyrill Gorcunov 			pr_warning("Could not bring tap device up");
3511dfc7c24SMarc Zyngier 	}
352cb7202c1SSasha Levin 
353cb7202c1SSasha Levin 	close(sock);
3543b02f580SSasha Levin 
3553b02f580SSasha Levin 	return 1;
3563b02f580SSasha Levin 
3573b02f580SSasha Levin fail:
3583b02f580SSasha Levin 	if (sock >= 0)
3593b02f580SSasha Levin 		close(sock);
3609a6d73f1SSasha Levin 	if (ndev->tap_fd >= 0)
3619a6d73f1SSasha Levin 		close(ndev->tap_fd);
3623b02f580SSasha Levin 
3633b02f580SSasha Levin 	return 0;
3644f56d42cSAsias He }
3654f56d42cSAsias He 
366*49bada43SJean-Philippe Brucker static void virtio_net__tap_exit(struct net_dev *ndev)
367*49bada43SJean-Philippe Brucker {
368*49bada43SJean-Philippe Brucker 	int sock;
369*49bada43SJean-Philippe Brucker 	struct ifreq ifr;
370*49bada43SJean-Philippe Brucker 
371*49bada43SJean-Philippe Brucker 	if (ndev->params->tapif)
372*49bada43SJean-Philippe Brucker 		return;
373*49bada43SJean-Philippe Brucker 
374*49bada43SJean-Philippe Brucker 	sock = socket(AF_INET, SOCK_STREAM, 0);
375*49bada43SJean-Philippe Brucker 	strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
376*49bada43SJean-Philippe Brucker 	ioctl(sock, SIOCGIFFLAGS, &ifr);
377*49bada43SJean-Philippe Brucker 	ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING);
378*49bada43SJean-Philippe Brucker 	if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0)
379*49bada43SJean-Philippe Brucker 		pr_warning("Count not bring tap device down");
380*49bada43SJean-Philippe Brucker 	close(sock);
381*49bada43SJean-Philippe Brucker }
382*49bada43SJean-Philippe Brucker 
38359ee54ebSWei Chen static bool virtio_net__tap_create(struct net_dev *ndev)
38459ee54ebSWei Chen {
38559ee54ebSWei Chen 	int offload;
38659ee54ebSWei Chen 	struct ifreq ifr;
38759ee54ebSWei Chen 	const struct virtio_net_params *params = ndev->params;
38859ee54ebSWei Chen 	bool macvtap = (!!params->tapif) && (params->tapif[0] == '/');
38959ee54ebSWei Chen 
39059ee54ebSWei Chen 	/* Did the user already gave us the FD? */
39159ee54ebSWei Chen 	if (params->fd)
39259ee54ebSWei Chen 		ndev->tap_fd = params->fd;
39359ee54ebSWei Chen 	else {
39459ee54ebSWei Chen 		const char *tap_file = "/dev/net/tun";
39559ee54ebSWei Chen 
39659ee54ebSWei Chen 		/* Did the user ask us to use macvtap? */
39759ee54ebSWei Chen 		if (macvtap)
39859ee54ebSWei Chen 			tap_file = params->tapif;
39959ee54ebSWei Chen 
40059ee54ebSWei Chen 		ndev->tap_fd = open(tap_file, O_RDWR);
40159ee54ebSWei Chen 		if (ndev->tap_fd < 0) {
40259ee54ebSWei Chen 			pr_warning("Unable to open %s", tap_file);
40359ee54ebSWei Chen 			return 0;
40459ee54ebSWei Chen 		}
40559ee54ebSWei Chen 	}
40659ee54ebSWei Chen 
40759ee54ebSWei Chen 	if (!macvtap &&
40859ee54ebSWei Chen 	    virtio_net_request_tap(ndev, &ifr, params->tapif) < 0) {
40959ee54ebSWei Chen 		pr_warning("Config tap device error. Are you root?");
41059ee54ebSWei Chen 		goto fail;
41159ee54ebSWei Chen 	}
41259ee54ebSWei Chen 
41359ee54ebSWei Chen 	/*
41459ee54ebSWei Chen 	 * The UFO support had been removed from kernel in commit:
41559ee54ebSWei Chen 	 * ID: fb652fdfe83710da0ca13448a41b7ed027d0a984
41659ee54ebSWei Chen 	 * https://www.spinics.net/lists/netdev/msg443562.html
41759ee54ebSWei Chen 	 * In oder to support the older kernels without this commit,
41859ee54ebSWei Chen 	 * we set the TUN_F_UFO to offload by default to test the status of
41959ee54ebSWei Chen 	 * UFO kernel support.
42059ee54ebSWei Chen 	 */
42159ee54ebSWei Chen 	ndev->tap_ufo = true;
42259ee54ebSWei Chen 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
42359ee54ebSWei Chen 	if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
42459ee54ebSWei Chen 		/*
42559ee54ebSWei Chen 		 * Is this failure caused by kernel remove the UFO support?
42659ee54ebSWei Chen 		 * Try TUNSETOFFLOAD without TUN_F_UFO.
42759ee54ebSWei Chen 		 */
42859ee54ebSWei Chen 		offload &= ~TUN_F_UFO;
42959ee54ebSWei Chen 		if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
43059ee54ebSWei Chen 			pr_warning("Config tap device TUNSETOFFLOAD error");
43159ee54ebSWei Chen 			goto fail;
43259ee54ebSWei Chen 		}
43359ee54ebSWei Chen 		ndev->tap_ufo = false;
43459ee54ebSWei Chen 	}
43559ee54ebSWei Chen 
43659ee54ebSWei Chen 	return 1;
43759ee54ebSWei Chen 
43859ee54ebSWei Chen fail:
43959ee54ebSWei Chen 	if ((ndev->tap_fd >= 0) || (!params->fd) )
44059ee54ebSWei Chen 		close(ndev->tap_fd);
44159ee54ebSWei Chen 
44259ee54ebSWei Chen 	return 0;
44359ee54ebSWei Chen }
44459ee54ebSWei Chen 
445b4fdde6dSAsias He static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
446b4fdde6dSAsias He {
447b4fdde6dSAsias He 	return writev(ndev->tap_fd, iov, out);
448b4fdde6dSAsias He }
449b4fdde6dSAsias He 
450b4fdde6dSAsias He static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
451b4fdde6dSAsias He {
452b4fdde6dSAsias He 	return readv(ndev->tap_fd, iov, in);
453b4fdde6dSAsias He }
454b4fdde6dSAsias He 
455b4fdde6dSAsias He static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
456b4fdde6dSAsias He {
457b4fdde6dSAsias He 	return uip_tx(iov, out, &ndev->info);
458b4fdde6dSAsias He }
459b4fdde6dSAsias He 
460b4fdde6dSAsias He static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
461b4fdde6dSAsias He {
462b4fdde6dSAsias He 	return uip_rx(iov, in, &ndev->info);
463b4fdde6dSAsias He }
464b4fdde6dSAsias He 
465b4fdde6dSAsias He static struct net_dev_operations tap_ops = {
466b4fdde6dSAsias He 	.rx	= tap_ops_rx,
467b4fdde6dSAsias He 	.tx	= tap_ops_tx,
468b4fdde6dSAsias He };
469b4fdde6dSAsias He 
470b4fdde6dSAsias He static struct net_dev_operations uip_ops = {
471b4fdde6dSAsias He 	.rx	= uip_ops_rx,
472b4fdde6dSAsias He 	.tx	= uip_ops_tx,
473b4fdde6dSAsias He };
474b4fdde6dSAsias He 
475c5ae742bSSasha Levin static u8 *get_config(struct kvm *kvm, void *dev)
47678a2a3e2SSasha Levin {
47778a2a3e2SSasha Levin 	struct net_dev *ndev = dev;
47878a2a3e2SSasha Levin 
479c5ae742bSSasha Levin 	return ((u8 *)(&ndev->config));
48078a2a3e2SSasha Levin }
48178a2a3e2SSasha Levin 
48278a2a3e2SSasha Levin static u32 get_host_features(struct kvm *kvm, void *dev)
48378a2a3e2SSasha Levin {
48459ee54ebSWei Chen 	u32 features;
48579052597SSasha Levin 	struct net_dev *ndev = dev;
48679052597SSasha Levin 
48759ee54ebSWei Chen 	features = 1UL << VIRTIO_NET_F_MAC
48878a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_CSUM
48978a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_HOST_TSO4
49078a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_HOST_TSO6
49178a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_GUEST_TSO4
49292c1f37bSSasha Levin 		| 1UL << VIRTIO_NET_F_GUEST_TSO6
493754c8ce3SSasha Levin 		| 1UL << VIRTIO_RING_F_EVENT_IDX
494abd4a801SSasha Levin 		| 1UL << VIRTIO_RING_F_INDIRECT_DESC
4956585ffb7SSasha Levin 		| 1UL << VIRTIO_NET_F_CTRL_VQ
4967668c3a6SSasha Levin 		| 1UL << VIRTIO_NET_F_MRG_RXBUF
49779052597SSasha Levin 		| 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0);
49859ee54ebSWei Chen 
49959ee54ebSWei Chen 	/*
50059ee54ebSWei Chen 	 * The UFO feature for host and guest only can be enabled when the
50159ee54ebSWei Chen 	 * kernel has TAP UFO support.
50259ee54ebSWei Chen 	 */
50359ee54ebSWei Chen 	if (ndev->tap_ufo)
50459ee54ebSWei Chen 		features |= (1UL << VIRTIO_NET_F_HOST_UFO
50559ee54ebSWei Chen 				| 1UL << VIRTIO_NET_F_GUEST_UFO);
50659ee54ebSWei Chen 
50759ee54ebSWei Chen 	return features;
50878a2a3e2SSasha Levin }
50978a2a3e2SSasha Levin 
51015636d5eSYing-Shiuan Pan static int virtio_net__vhost_set_features(struct net_dev *ndev)
51115636d5eSYing-Shiuan Pan {
51215636d5eSYing-Shiuan Pan 	u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX;
51315636d5eSYing-Shiuan Pan 	u64 vhost_features;
51415636d5eSYing-Shiuan Pan 
51515636d5eSYing-Shiuan Pan 	if (ioctl(ndev->vhost_fd, VHOST_GET_FEATURES, &vhost_features) != 0)
51615636d5eSYing-Shiuan Pan 		die_perror("VHOST_GET_FEATURES failed");
51715636d5eSYing-Shiuan Pan 
51815636d5eSYing-Shiuan Pan 	/* make sure both side support mergable rx buffers */
51915636d5eSYing-Shiuan Pan 	if (vhost_features & 1UL << VIRTIO_NET_F_MRG_RXBUF &&
52015636d5eSYing-Shiuan Pan 			has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF))
52115636d5eSYing-Shiuan Pan 		features |= 1UL << VIRTIO_NET_F_MRG_RXBUF;
52215636d5eSYing-Shiuan Pan 
52315636d5eSYing-Shiuan Pan 	return ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features);
52415636d5eSYing-Shiuan Pan }
52515636d5eSYing-Shiuan Pan 
52678a2a3e2SSasha Levin static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
52778a2a3e2SSasha Levin {
52878a2a3e2SSasha Levin 	struct net_dev *ndev = dev;
5298ed60bbeSMarc Zyngier 	struct virtio_net_config *conf = &ndev->config;
53078a2a3e2SSasha Levin 
53178a2a3e2SSasha Levin 	ndev->features = features;
5328aa155c4SYing-Shiuan Pan 
5338ed60bbeSMarc Zyngier 	conf->status = virtio_host_to_guest_u16(&ndev->vdev, conf->status);
5348ed60bbeSMarc Zyngier 	conf->max_virtqueue_pairs = virtio_host_to_guest_u16(&ndev->vdev,
5358ed60bbeSMarc Zyngier 							     conf->max_virtqueue_pairs);
53695242e44SJean-Philippe Brucker }
5378ed60bbeSMarc Zyngier 
53895242e44SJean-Philippe Brucker static void virtio_net_start(struct net_dev *ndev)
53995242e44SJean-Philippe Brucker {
5408aa155c4SYing-Shiuan Pan 	if (ndev->mode == NET_MODE_TAP) {
5418aa155c4SYing-Shiuan Pan 		if (!virtio_net__tap_init(ndev))
54259ee54ebSWei Chen 			die_perror("TAP device initialized failed because");
54359ee54ebSWei Chen 
54415636d5eSYing-Shiuan Pan 		if (ndev->vhost_fd &&
54515636d5eSYing-Shiuan Pan 				virtio_net__vhost_set_features(ndev) != 0)
54615636d5eSYing-Shiuan Pan 			die_perror("VHOST_SET_FEATURES failed");
5478aa155c4SYing-Shiuan Pan 	} else {
5488aa155c4SYing-Shiuan Pan 		ndev->info.vnet_hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ?
5498aa155c4SYing-Shiuan Pan 						sizeof(struct virtio_net_hdr_mrg_rxbuf) :
5508aa155c4SYing-Shiuan Pan 						sizeof(struct virtio_net_hdr);
5518aa155c4SYing-Shiuan Pan 		uip_init(&ndev->info);
5528aa155c4SYing-Shiuan Pan 	}
55378a2a3e2SSasha Levin }
55478a2a3e2SSasha Levin 
555*49bada43SJean-Philippe Brucker static void virtio_net_stop(struct net_dev *ndev)
556*49bada43SJean-Philippe Brucker {
557*49bada43SJean-Philippe Brucker 	/* Undo whatever start() did */
558*49bada43SJean-Philippe Brucker 	if (ndev->mode == NET_MODE_TAP)
559*49bada43SJean-Philippe Brucker 		virtio_net__tap_exit(ndev);
560*49bada43SJean-Philippe Brucker 	else
561*49bada43SJean-Philippe Brucker 		uip_exit(&ndev->info);
562*49bada43SJean-Philippe Brucker }
563*49bada43SJean-Philippe Brucker 
56495242e44SJean-Philippe Brucker static void notify_status(struct kvm *kvm, void *dev, u32 status)
56595242e44SJean-Philippe Brucker {
56695242e44SJean-Philippe Brucker 	if (status & VIRTIO__STATUS_START)
56795242e44SJean-Philippe Brucker 		virtio_net_start(dev);
568*49bada43SJean-Philippe Brucker 	else if (status & VIRTIO__STATUS_STOP)
569*49bada43SJean-Philippe Brucker 		virtio_net_stop(dev);
57095242e44SJean-Philippe Brucker }
57195242e44SJean-Philippe Brucker 
5729e56ec14SSasha Levin static bool is_ctrl_vq(struct net_dev *ndev, u32 vq)
5739e56ec14SSasha Levin {
5749e56ec14SSasha Levin 	return vq == (u32)(ndev->queue_pairs * 2);
5759e56ec14SSasha Levin }
5769e56ec14SSasha Levin 
577c59ba304SWill Deacon static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 page_size, u32 align,
578c59ba304SWill Deacon 		   u32 pfn)
57978a2a3e2SSasha Levin {
5809ed67cdcSSasha Levin 	struct vhost_vring_state state = { .index = vq };
581ad96e867SJean-Philippe Brucker 	struct net_dev_queue *net_queue;
5829ed67cdcSSasha Levin 	struct vhost_vring_addr addr;
58378a2a3e2SSasha Levin 	struct net_dev *ndev = dev;
58478a2a3e2SSasha Levin 	struct virt_queue *queue;
58578a2a3e2SSasha Levin 	void *p;
5869ed67cdcSSasha Levin 	int r;
58778a2a3e2SSasha Levin 
588312c62d1SSasha Levin 	compat__remove_message(compat_id);
58978a2a3e2SSasha Levin 
590ad96e867SJean-Philippe Brucker 	net_queue	= &ndev->queues[vq];
591ad96e867SJean-Philippe Brucker 	net_queue->id	= vq;
592ad96e867SJean-Philippe Brucker 	net_queue->ndev	= ndev;
593ad96e867SJean-Philippe Brucker 	queue		= &net_queue->vq;
59478a2a3e2SSasha Levin 	queue->pfn	= pfn;
595e7e2950aSSasha Levin 	p		= virtio_get_vq(kvm, queue->pfn, page_size);
59678a2a3e2SSasha Levin 
597c59ba304SWill Deacon 	vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, align);
5988ed60bbeSMarc Zyngier 	virtio_init_device_vq(&ndev->vdev, queue);
59978a2a3e2SSasha Levin 
600ad96e867SJean-Philippe Brucker 	mutex_init(&net_queue->lock);
601ad96e867SJean-Philippe Brucker 	pthread_cond_init(&net_queue->cond, NULL);
6029e56ec14SSasha Levin 	if (is_ctrl_vq(ndev, vq)) {
603ad96e867SJean-Philippe Brucker 		pthread_create(&net_queue->thread, NULL, virtio_net_ctrl_thread,
604ad96e867SJean-Philippe Brucker 			       net_queue);
6059e56ec14SSasha Levin 
6069e56ec14SSasha Levin 		return 0;
6079e56ec14SSasha Levin 	} else if (ndev->vhost_fd == 0 ) {
6089e56ec14SSasha Levin 		if (vq & 1)
609ad96e867SJean-Philippe Brucker 			pthread_create(&net_queue->thread, NULL,
610ad96e867SJean-Philippe Brucker 				       virtio_net_tx_thread, net_queue);
6116585ffb7SSasha Levin 		else
612ad96e867SJean-Philippe Brucker 			pthread_create(&net_queue->thread, NULL,
613ad96e867SJean-Philippe Brucker 				       virtio_net_rx_thread, net_queue);
6146585ffb7SSasha Levin 
61578a2a3e2SSasha Levin 		return 0;
6166585ffb7SSasha Levin 	}
6179ed67cdcSSasha Levin 
6188ed60bbeSMarc Zyngier 	if (queue->endian != VIRTIO_ENDIAN_HOST)
619b960f243SAndre Przywara 		die_perror("VHOST requires the same endianness in guest and host");
6208ed60bbeSMarc Zyngier 
6219ed67cdcSSasha Levin 	state.num = queue->vring.num;
6229ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state);
6239ed67cdcSSasha Levin 	if (r < 0)
6249ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_NUM failed");
6259ed67cdcSSasha Levin 	state.num = 0;
6269ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state);
6279ed67cdcSSasha Levin 	if (r < 0)
6289ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_BASE failed");
6299ed67cdcSSasha Levin 
6309ed67cdcSSasha Levin 	addr = (struct vhost_vring_addr) {
6319ed67cdcSSasha Levin 		.index = vq,
6329ed67cdcSSasha Levin 		.desc_user_addr = (u64)(unsigned long)queue->vring.desc,
6339ed67cdcSSasha Levin 		.avail_user_addr = (u64)(unsigned long)queue->vring.avail,
6349ed67cdcSSasha Levin 		.used_user_addr = (u64)(unsigned long)queue->vring.used,
6359ed67cdcSSasha Levin 	};
6369ed67cdcSSasha Levin 
6379ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr);
6389ed67cdcSSasha Levin 	if (r < 0)
6399ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_ADDR failed");
6409ed67cdcSSasha Levin 
6419ed67cdcSSasha Levin 	return 0;
6429ed67cdcSSasha Levin }
6439ed67cdcSSasha Levin 
644*49bada43SJean-Philippe Brucker static void exit_vq(struct kvm *kvm, void *dev, u32 vq)
645*49bada43SJean-Philippe Brucker {
646*49bada43SJean-Philippe Brucker 	struct net_dev *ndev = dev;
647*49bada43SJean-Philippe Brucker 	struct net_dev_queue *queue = &ndev->queues[vq];
648*49bada43SJean-Philippe Brucker 
649*49bada43SJean-Philippe Brucker 	if (!is_ctrl_vq(ndev, vq) && queue->gsi) {
650*49bada43SJean-Philippe Brucker 		irq__del_irqfd(kvm, queue->gsi, queue->irqfd);
651*49bada43SJean-Philippe Brucker 		close(queue->irqfd);
652*49bada43SJean-Philippe Brucker 		queue->gsi = queue->irqfd = 0;
653*49bada43SJean-Philippe Brucker 	}
654*49bada43SJean-Philippe Brucker 
655*49bada43SJean-Philippe Brucker 	/*
656*49bada43SJean-Philippe Brucker 	 * TODO: vhost reset owner. It's the only way to cleanly stop vhost, but
657*49bada43SJean-Philippe Brucker 	 * we can't restart it at the moment.
658*49bada43SJean-Philippe Brucker 	 */
659*49bada43SJean-Philippe Brucker 	if (ndev->vhost_fd && !is_ctrl_vq(ndev, vq)) {
660*49bada43SJean-Philippe Brucker 		pr_warning("Cannot reset VHOST queue");
661*49bada43SJean-Philippe Brucker 		ioctl(ndev->vhost_fd, VHOST_RESET_OWNER);
662*49bada43SJean-Philippe Brucker 		return;
663*49bada43SJean-Philippe Brucker 	}
664*49bada43SJean-Philippe Brucker 
665*49bada43SJean-Philippe Brucker 	/*
666*49bada43SJean-Philippe Brucker 	 * Threads are waiting on cancellation points (readv or
667*49bada43SJean-Philippe Brucker 	 * pthread_cond_wait) and should stop gracefully.
668*49bada43SJean-Philippe Brucker 	 */
669*49bada43SJean-Philippe Brucker 	pthread_cancel(queue->thread);
670*49bada43SJean-Philippe Brucker 	pthread_join(queue->thread, NULL);
671*49bada43SJean-Philippe Brucker }
672*49bada43SJean-Philippe Brucker 
6739ed67cdcSSasha Levin static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
6749ed67cdcSSasha Levin {
6759ed67cdcSSasha Levin 	struct net_dev *ndev = dev;
676ad96e867SJean-Philippe Brucker 	struct net_dev_queue *queue = &ndev->queues[vq];
6779ed67cdcSSasha Levin 	struct vhost_vring_file file;
6789ed67cdcSSasha Levin 	int r;
6799ed67cdcSSasha Levin 
6809ed67cdcSSasha Levin 	if (ndev->vhost_fd == 0)
6819ed67cdcSSasha Levin 		return;
6829ed67cdcSSasha Levin 
6839ed67cdcSSasha Levin 	file = (struct vhost_vring_file) {
6849ed67cdcSSasha Levin 		.index	= vq,
685e59679d2SJean-Philippe Brucker 		.fd	= eventfd(0, 0),
6869ed67cdcSSasha Levin 	};
6879ed67cdcSSasha Levin 
688e59679d2SJean-Philippe Brucker 	r = irq__add_irqfd(kvm, gsi, file.fd, -1);
6899ed67cdcSSasha Levin 	if (r < 0)
6909ed67cdcSSasha Levin 		die_perror("KVM_IRQFD failed");
6919ed67cdcSSasha Levin 
692*49bada43SJean-Philippe Brucker 	queue->irqfd = file.fd;
693*49bada43SJean-Philippe Brucker 	queue->gsi = gsi;
694*49bada43SJean-Philippe Brucker 
6959ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file);
6969ed67cdcSSasha Levin 	if (r < 0)
6979ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_CALL failed");
6989ed67cdcSSasha Levin 	file.fd = ndev->tap_fd;
6999ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file);
7009ed67cdcSSasha Levin 	if (r != 0)
7019ed67cdcSSasha Levin 		die("VHOST_NET_SET_BACKEND failed %d", errno);
7029ed67cdcSSasha Levin 
7039ed67cdcSSasha Levin }
7049ed67cdcSSasha Levin 
7059ed67cdcSSasha Levin static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
7069ed67cdcSSasha Levin {
7079ed67cdcSSasha Levin 	struct net_dev *ndev = dev;
7089ed67cdcSSasha Levin 	struct vhost_vring_file file = {
7099ed67cdcSSasha Levin 		.index	= vq,
7109ed67cdcSSasha Levin 		.fd	= efd,
7119ed67cdcSSasha Levin 	};
7129ed67cdcSSasha Levin 	int r;
7139ed67cdcSSasha Levin 
7149e56ec14SSasha Levin 	if (ndev->vhost_fd == 0 || is_ctrl_vq(ndev, vq))
7159ed67cdcSSasha Levin 		return;
7169ed67cdcSSasha Levin 
7179ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file);
7189ed67cdcSSasha Levin 	if (r < 0)
7199ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_KICK failed");
72078a2a3e2SSasha Levin }
72178a2a3e2SSasha Levin 
72278a2a3e2SSasha Levin static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
72378a2a3e2SSasha Levin {
7249a6d73f1SSasha Levin 	struct net_dev *ndev = dev;
7259a6d73f1SSasha Levin 
7269a6d73f1SSasha Levin 	virtio_net_handle_callback(kvm, ndev, vq);
72778a2a3e2SSasha Levin 
72878a2a3e2SSasha Levin 	return 0;
72978a2a3e2SSasha Levin }
73078a2a3e2SSasha Levin 
73153fbb17bSJean-Philippe Brucker static struct virt_queue *get_vq(struct kvm *kvm, void *dev, u32 vq)
73278a2a3e2SSasha Levin {
73378a2a3e2SSasha Levin 	struct net_dev *ndev = dev;
73478a2a3e2SSasha Levin 
735ad96e867SJean-Philippe Brucker 	return &ndev->queues[vq].vq;
73678a2a3e2SSasha Levin }
73778a2a3e2SSasha Levin 
73878a2a3e2SSasha Levin static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
73978a2a3e2SSasha Levin {
7400f4f51a8SAsias He 	/* FIXME: dynamic */
74178a2a3e2SSasha Levin 	return VIRTIO_NET_QUEUE_SIZE;
74278a2a3e2SSasha Levin }
74378a2a3e2SSasha Levin 
7440f4f51a8SAsias He static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size)
7450f4f51a8SAsias He {
7460f4f51a8SAsias He 	/* FIXME: dynamic */
7470f4f51a8SAsias He 	return size;
7480f4f51a8SAsias He }
7490f4f51a8SAsias He 
750b98ac591SJean-Philippe Brucker static int get_vq_count(struct kvm *kvm, void *dev)
751b98ac591SJean-Philippe Brucker {
752b98ac591SJean-Philippe Brucker 	struct net_dev *ndev = dev;
753b98ac591SJean-Philippe Brucker 
754b98ac591SJean-Philippe Brucker 	return ndev->queue_pairs * 2 + 1;
755b98ac591SJean-Philippe Brucker }
756b98ac591SJean-Philippe Brucker 
75715542babSAndre Przywara static struct virtio_ops net_dev_virtio_ops = {
7581c47ce69SSasha Levin 	.get_config		= get_config,
7591c47ce69SSasha Levin 	.get_host_features	= get_host_features,
7601c47ce69SSasha Levin 	.set_guest_features	= set_guest_features,
761b98ac591SJean-Philippe Brucker 	.get_vq_count		= get_vq_count,
7621c47ce69SSasha Levin 	.init_vq		= init_vq,
763*49bada43SJean-Philippe Brucker 	.exit_vq		= exit_vq,
76453fbb17bSJean-Philippe Brucker 	.get_vq			= get_vq,
7651c47ce69SSasha Levin 	.get_size_vq		= get_size_vq,
7660f4f51a8SAsias He 	.set_size_vq		= set_size_vq,
7670f4f51a8SAsias He 	.notify_vq		= notify_vq,
7689ed67cdcSSasha Levin 	.notify_vq_gsi		= notify_vq_gsi,
7699ed67cdcSSasha Levin 	.notify_vq_eventfd	= notify_vq_eventfd,
77095242e44SJean-Philippe Brucker 	.notify_status		= notify_status,
7711c47ce69SSasha Levin };
7721c47ce69SSasha Levin 
7739ed67cdcSSasha Levin static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev)
7749ed67cdcSSasha Levin {
7757f9733c2SAndreas Herrmann 	struct kvm_mem_bank *bank;
7769ed67cdcSSasha Levin 	struct vhost_memory *mem;
7777f9733c2SAndreas Herrmann 	int r, i;
7789ed67cdcSSasha Levin 
7799ed67cdcSSasha Levin 	ndev->vhost_fd = open("/dev/vhost-net", O_RDWR);
7809ed67cdcSSasha Levin 	if (ndev->vhost_fd < 0)
7819ed67cdcSSasha Levin 		die_perror("Failed openning vhost-net device");
7829ed67cdcSSasha Levin 
7837f9733c2SAndreas Herrmann 	mem = calloc(1, sizeof(*mem) + kvm->mem_slots * sizeof(struct vhost_memory_region));
7849ed67cdcSSasha Levin 	if (mem == NULL)
7859ed67cdcSSasha Levin 		die("Failed allocating memory for vhost memory map");
7869ed67cdcSSasha Levin 
7877f9733c2SAndreas Herrmann 	i = 0;
7887f9733c2SAndreas Herrmann 	list_for_each_entry(bank, &kvm->mem_banks, list) {
7897f9733c2SAndreas Herrmann 		mem->regions[i] = (struct vhost_memory_region) {
7907f9733c2SAndreas Herrmann 			.guest_phys_addr = bank->guest_phys_addr,
7917f9733c2SAndreas Herrmann 			.memory_size	 = bank->size,
7927f9733c2SAndreas Herrmann 			.userspace_addr	 = (unsigned long)bank->host_addr,
7939ed67cdcSSasha Levin 		};
7947f9733c2SAndreas Herrmann 		i++;
7957f9733c2SAndreas Herrmann 	}
7967f9733c2SAndreas Herrmann 	mem->nregions = i;
7979ed67cdcSSasha Levin 
7989ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER);
7999ed67cdcSSasha Levin 	if (r != 0)
8009ed67cdcSSasha Levin 		die_perror("VHOST_SET_OWNER failed");
8019ed67cdcSSasha Levin 
8029ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem);
8039ed67cdcSSasha Levin 	if (r != 0)
8049ed67cdcSSasha Levin 		die_perror("VHOST_SET_MEM_TABLE failed");
805627d6874SAsias He 
806627d6874SAsias He 	ndev->vdev.use_vhost = true;
807627d6874SAsias He 
8089ed67cdcSSasha Levin 	free(mem);
8099ed67cdcSSasha Levin }
8109ed67cdcSSasha Levin 
8115f225124SSasha Levin static inline void str_to_mac(const char *str, char *mac)
8125f225124SSasha Levin {
8135f225124SSasha Levin 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
8145f225124SSasha Levin 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
8155f225124SSasha Levin }
8164346fd8fSSasha Levin static int set_net_param(struct kvm *kvm, struct virtio_net_params *p,
8174346fd8fSSasha Levin 			const char *param, const char *val)
8185f225124SSasha Levin {
8195f225124SSasha Levin 	if (strcmp(param, "guest_mac") == 0) {
8205f225124SSasha Levin 		str_to_mac(val, p->guest_mac);
8215f225124SSasha Levin 	} else if (strcmp(param, "mode") == 0) {
8225f225124SSasha Levin 		if (!strncmp(val, "user", 4)) {
8235f225124SSasha Levin 			int i;
8245f225124SSasha Levin 
8255f225124SSasha Levin 			for (i = 0; i < kvm->cfg.num_net_devices; i++)
8265f225124SSasha Levin 				if (kvm->cfg.net_params[i].mode == NET_MODE_USER)
8275f225124SSasha Levin 					die("Only one usermode network device allowed at a time");
8285f225124SSasha Levin 			p->mode = NET_MODE_USER;
8295f225124SSasha Levin 		} else if (!strncmp(val, "tap", 3)) {
8305f225124SSasha Levin 			p->mode = NET_MODE_TAP;
8315f225124SSasha Levin 		} else if (!strncmp(val, "none", 4)) {
8325f225124SSasha Levin 			kvm->cfg.no_net = 1;
8335f225124SSasha Levin 			return -1;
8345f225124SSasha Levin 		} else
8355f225124SSasha Levin 			die("Unknown network mode %s, please use user, tap or none", kvm->cfg.network);
8365f225124SSasha Levin 	} else if (strcmp(param, "script") == 0) {
8375f225124SSasha Levin 		p->script = strdup(val);
838eef27ae3SFan Du 	} else if (strcmp(param, "downscript") == 0) {
839eef27ae3SFan Du 		p->downscript = strdup(val);
8405f225124SSasha Levin 	} else if (strcmp(param, "guest_ip") == 0) {
8415f225124SSasha Levin 		p->guest_ip = strdup(val);
8425f225124SSasha Levin 	} else if (strcmp(param, "host_ip") == 0) {
8435f225124SSasha Levin 		p->host_ip = strdup(val);
8445f225124SSasha Levin 	} else if (strcmp(param, "trans") == 0) {
8455f225124SSasha Levin 		p->trans = strdup(val);
8461dfc7c24SMarc Zyngier 	} else if (strcmp(param, "tapif") == 0) {
8471dfc7c24SMarc Zyngier 		p->tapif = strdup(val);
8485f225124SSasha Levin 	} else if (strcmp(param, "vhost") == 0) {
8495f225124SSasha Levin 		p->vhost = atoi(val);
8505f225124SSasha Levin 	} else if (strcmp(param, "fd") == 0) {
8515f225124SSasha Levin 		p->fd = atoi(val);
85279052597SSasha Levin 	} else if (strcmp(param, "mq") == 0) {
85379052597SSasha Levin 		p->mq = atoi(val);
8545f225124SSasha Levin 	} else
8555f225124SSasha Levin 		die("Unknown network parameter %s", param);
8565f225124SSasha Levin 
8575f225124SSasha Levin 	return 0;
8585f225124SSasha Levin }
8595f225124SSasha Levin 
8605f225124SSasha Levin int netdev_parser(const struct option *opt, const char *arg, int unset)
8615f225124SSasha Levin {
8625f225124SSasha Levin 	struct virtio_net_params p;
8635f225124SSasha Levin 	char *buf = NULL, *cmd = NULL, *cur = NULL;
8645f225124SSasha Levin 	bool on_cmd = true;
8655f225124SSasha Levin 	struct kvm *kvm = opt->ptr;
8665f225124SSasha Levin 
8675f225124SSasha Levin 	if (arg) {
8685f225124SSasha Levin 		buf = strdup(arg);
8695f225124SSasha Levin 		if (buf == NULL)
8705f225124SSasha Levin 			die("Failed allocating new net buffer");
8715f225124SSasha Levin 		cur = strtok(buf, ",=");
8725f225124SSasha Levin 	}
8735f225124SSasha Levin 
8745f225124SSasha Levin 	p = (struct virtio_net_params) {
8755f225124SSasha Levin 		.guest_ip	= DEFAULT_GUEST_ADDR,
8765f225124SSasha Levin 		.host_ip	= DEFAULT_HOST_ADDR,
8775f225124SSasha Levin 		.script		= DEFAULT_SCRIPT,
878eef27ae3SFan Du 		.downscript	= DEFAULT_SCRIPT,
8795f225124SSasha Levin 		.mode		= NET_MODE_TAP,
8805f225124SSasha Levin 	};
8815f225124SSasha Levin 
8825f225124SSasha Levin 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
8835f225124SSasha Levin 	p.guest_mac[5] += kvm->cfg.num_net_devices;
8845f225124SSasha Levin 
8855f225124SSasha Levin 	while (cur) {
8865f225124SSasha Levin 		if (on_cmd) {
8875f225124SSasha Levin 			cmd = cur;
8885f225124SSasha Levin 		} else {
8894346fd8fSSasha Levin 			if (set_net_param(kvm, &p, cmd, cur) < 0)
8905f225124SSasha Levin 				goto done;
8915f225124SSasha Levin 		}
8925f225124SSasha Levin 		on_cmd = !on_cmd;
8935f225124SSasha Levin 
8945f225124SSasha Levin 		cur = strtok(NULL, ",=");
8955f225124SSasha Levin 	};
8965f225124SSasha Levin 
8975f225124SSasha Levin 	kvm->cfg.num_net_devices++;
8985f225124SSasha Levin 
8995f225124SSasha Levin 	kvm->cfg.net_params = realloc(kvm->cfg.net_params, kvm->cfg.num_net_devices * sizeof(*kvm->cfg.net_params));
9005f225124SSasha Levin 	if (kvm->cfg.net_params == NULL)
9015f225124SSasha Levin 		die("Failed adding new network device");
9025f225124SSasha Levin 
9035f225124SSasha Levin 	kvm->cfg.net_params[kvm->cfg.num_net_devices - 1] = p;
9045f225124SSasha Levin 
9055f225124SSasha Levin done:
9065f225124SSasha Levin 	free(buf);
9075f225124SSasha Levin 	return 0;
9085f225124SSasha Levin }
9095f225124SSasha Levin 
9105f225124SSasha Levin static int virtio_net__init_one(struct virtio_net_params *params)
9114f56d42cSAsias He {
91276a4aac6SWill Deacon 	int i, err;
9139a6d73f1SSasha Levin 	struct net_dev *ndev;
91476a4aac6SWill Deacon 	struct virtio_ops *ops;
915dc7a55d6SSuzuki K. Poulose 	enum virtio_trans trans = VIRTIO_DEFAULT_TRANS(params->kvm);
9169a6d73f1SSasha Levin 
9179a6d73f1SSasha Levin 	ndev = calloc(1, sizeof(struct net_dev));
9189a6d73f1SSasha Levin 	if (ndev == NULL)
9195f225124SSasha Levin 		return -ENOMEM;
9209a6d73f1SSasha Levin 
92176a4aac6SWill Deacon 	ops = malloc(sizeof(*ops));
92276a4aac6SWill Deacon 	if (ops == NULL) {
92376a4aac6SWill Deacon 		err = -ENOMEM;
92476a4aac6SWill Deacon 		goto err_free_ndev;
92576a4aac6SWill Deacon 	}
92676a4aac6SWill Deacon 
9279a6d73f1SSasha Levin 	list_add_tail(&ndev->list, &ndevs);
9289a6d73f1SSasha Levin 
9299a6d73f1SSasha Levin 	ndev->kvm = params->kvm;
9308c0ae74dSSasha Levin 	ndev->params = params;
9319a6d73f1SSasha Levin 
9329a6d73f1SSasha Levin 	mutex_init(&ndev->mutex);
93379052597SSasha Levin 	ndev->queue_pairs = max(1, min(VIRTIO_NET_NUM_QUEUES, params->mq));
9349a6d73f1SSasha Levin 	ndev->config.status = VIRTIO_NET_S_LINK_UP;
93579052597SSasha Levin 	if (ndev->queue_pairs > 1)
93679052597SSasha Levin 		ndev->config.max_virtqueue_pairs = ndev->queue_pairs;
93779052597SSasha Levin 
9380c54698eSAsias He 	for (i = 0 ; i < 6 ; i++) {
9399a6d73f1SSasha Levin 		ndev->config.mac[i]		= params->guest_mac[i];
9409a6d73f1SSasha Levin 		ndev->info.guest_mac.addr[i]	= params->guest_mac[i];
9419a6d73f1SSasha Levin 		ndev->info.host_mac.addr[i]	= params->host_mac[i];
9420c54698eSAsias He 	}
943f715177dSAsias He 
9449a6d73f1SSasha Levin 	ndev->mode = params->mode;
9459a6d73f1SSasha Levin 	if (ndev->mode == NET_MODE_TAP) {
9469a6d73f1SSasha Levin 		ndev->ops = &tap_ops;
94759ee54ebSWei Chen 		if (!virtio_net__tap_create(ndev))
94859ee54ebSWei Chen 			die_perror("You have requested a TAP device, but creation of one has failed because");
949b4fdde6dSAsias He 	} else {
9509a6d73f1SSasha Levin 		ndev->info.host_ip		= ntohl(inet_addr(params->host_ip));
9519a6d73f1SSasha Levin 		ndev->info.guest_ip		= ntohl(inet_addr(params->guest_ip));
9529a6d73f1SSasha Levin 		ndev->info.guest_netmask	= ntohl(inet_addr("255.255.255.0"));
9539a6d73f1SSasha Levin 		ndev->info.buf_nr		= 20,
9549a6d73f1SSasha Levin 		ndev->ops = &uip_ops;
9555f3aaf22SMarc Zyngier 		uip_static_init(&ndev->info);
956b4fdde6dSAsias He 	}
957b5ee1ea7SAsias He 
95876a4aac6SWill Deacon 	*ops = net_dev_virtio_ops;
959dc7a55d6SSuzuki K. Poulose 
960dc7a55d6SSuzuki K. Poulose 	if (params->trans) {
961dc7a55d6SSuzuki K. Poulose 		if (strcmp(params->trans, "mmio") == 0)
962dc7a55d6SSuzuki K. Poulose 			trans = VIRTIO_MMIO;
963dc7a55d6SSuzuki K. Poulose 		else if (strcmp(params->trans, "pci") == 0)
964dc7a55d6SSuzuki K. Poulose 			trans = VIRTIO_PCI;
96569205aa1SAsias He 		else
966dc7a55d6SSuzuki K. Poulose 			pr_warning("virtio-net: Unknown transport method : %s, "
967dc7a55d6SSuzuki K. Poulose 				   "falling back to %s.", params->trans,
968dc7a55d6SSuzuki K. Poulose 				   virtio_trans_name(trans));
969dc7a55d6SSuzuki K. Poulose 	}
970dc7a55d6SSuzuki K. Poulose 
971dc7a55d6SSuzuki K. Poulose 	virtio_init(params->kvm, ndev, &ndev->vdev, ops, trans,
97276a4aac6SWill Deacon 		    PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET);
97327ab67f5SSasha Levin 
9749ed67cdcSSasha Levin 	if (params->vhost)
9759ed67cdcSSasha Levin 		virtio_net__vhost_init(params->kvm, ndev);
976cb83de6fSSasha Levin 
977d278197dSAsias He 	if (compat_id == -1)
97852f34d2cSAsias He 		compat_id = virtio_compat_add_message("virtio-net", "CONFIG_VIRTIO_NET");
9795f225124SSasha Levin 
9805f225124SSasha Levin 	return 0;
98176a4aac6SWill Deacon 
98276a4aac6SWill Deacon err_free_ndev:
98376a4aac6SWill Deacon 	free(ndev);
98476a4aac6SWill Deacon 	return err;
9855f225124SSasha Levin }
9865f225124SSasha Levin 
9875f225124SSasha Levin int virtio_net__init(struct kvm *kvm)
9885f225124SSasha Levin {
9895f225124SSasha Levin 	int i;
9905f225124SSasha Levin 
9915f225124SSasha Levin 	for (i = 0; i < kvm->cfg.num_net_devices; i++) {
9925f225124SSasha Levin 		kvm->cfg.net_params[i].kvm = kvm;
9935f225124SSasha Levin 		virtio_net__init_one(&kvm->cfg.net_params[i]);
9945f225124SSasha Levin 	}
9955f225124SSasha Levin 
9965f225124SSasha Levin 	if (kvm->cfg.num_net_devices == 0 && kvm->cfg.no_net == 0) {
9978c0ae74dSSasha Levin 		static struct virtio_net_params net_params;
9985f225124SSasha Levin 
9995f225124SSasha Levin 		net_params = (struct virtio_net_params) {
10005f225124SSasha Levin 			.guest_ip	= kvm->cfg.guest_ip,
10015f225124SSasha Levin 			.host_ip	= kvm->cfg.host_ip,
10025f225124SSasha Levin 			.kvm		= kvm,
10035f225124SSasha Levin 			.script		= kvm->cfg.script,
10045f225124SSasha Levin 			.mode		= NET_MODE_USER,
10055f225124SSasha Levin 		};
10065f225124SSasha Levin 		str_to_mac(kvm->cfg.guest_mac, net_params.guest_mac);
10075f225124SSasha Levin 		str_to_mac(kvm->cfg.host_mac, net_params.host_mac);
10085f225124SSasha Levin 
10095f225124SSasha Levin 		virtio_net__init_one(&net_params);
10105f225124SSasha Levin 	}
10115f225124SSasha Levin 
10125f225124SSasha Levin 	return 0;
10135f225124SSasha Levin }
101449a8afd1SSasha Levin virtio_dev_init(virtio_net__init);
10155f225124SSasha Levin 
10165f225124SSasha Levin int virtio_net__exit(struct kvm *kvm)
10175f225124SSasha Levin {
1018eef27ae3SFan Du 	struct virtio_net_params *params;
1019eef27ae3SFan Du 	struct net_dev *ndev;
1020eef27ae3SFan Du 	struct list_head *ptr;
1021eef27ae3SFan Du 
1022eef27ae3SFan Du 	list_for_each(ptr, &ndevs) {
1023eef27ae3SFan Du 		ndev = list_entry(ptr, struct net_dev, list);
1024eef27ae3SFan Du 		params = ndev->params;
1025eef27ae3SFan Du 		/* Cleanup any tap device which attached to bridge */
1026eef27ae3SFan Du 		if (ndev->mode == NET_MODE_TAP &&
1027eef27ae3SFan Du 		    strcmp(params->downscript, "none"))
1028eef27ae3SFan Du 			virtio_net_exec_script(params->downscript, ndev->tap_name);
1029eef27ae3SFan Du 	}
10305f225124SSasha Levin 	return 0;
10314f56d42cSAsias He }
103249a8afd1SSasha Levin virtio_dev_exit(virtio_net__exit);
1033