xref: /kvmtool/virtio/net.c (revision 8b27bcff44fd4adaa1466e3198e4222816eefa06)
131638bcaSCyrill Gorcunov #include "kvm/virtio-pci-dev.h"
2b5ee1ea7SAsias He #include "kvm/virtio-net.h"
34f56d42cSAsias He #include "kvm/virtio.h"
44f56d42cSAsias He #include "kvm/mutex.h"
54f56d42cSAsias He #include "kvm/util.h"
64f56d42cSAsias He #include "kvm/kvm.h"
72449f6e3SSasha Levin #include "kvm/irq.h"
8b5ee1ea7SAsias He #include "kvm/uip.h"
9cb83de6fSSasha Levin #include "kvm/guest_compat.h"
107668c3a6SSasha Levin #include "kvm/iovec.h"
1105755b29SAndre Przywara #include "kvm/strbuf.h"
124f56d42cSAsias He 
139ed67cdcSSasha Levin #include <linux/vhost.h>
144f56d42cSAsias He #include <linux/virtio_net.h>
154f56d42cSAsias He #include <linux/if_tun.h>
1678a2a3e2SSasha Levin #include <linux/types.h>
17c229370aSIngo Molnar 
18c229370aSIngo Molnar #include <arpa/inet.h>
194f56d42cSAsias He #include <net/if.h>
20c229370aSIngo Molnar 
21c229370aSIngo Molnar #include <unistd.h>
224f56d42cSAsias He #include <fcntl.h>
23c229370aSIngo Molnar 
24cb7202c1SSasha Levin #include <sys/socket.h>
25c229370aSIngo Molnar #include <sys/ioctl.h>
26c229370aSIngo Molnar #include <sys/types.h>
2773b7d038SAmos Kong #include <sys/wait.h>
289ed67cdcSSasha Levin #include <sys/eventfd.h>
294f56d42cSAsias He 
304059ad8bSAsias He #define VIRTIO_NET_QUEUE_SIZE		256
316585ffb7SSasha Levin #define VIRTIO_NET_NUM_QUEUES		8
324f56d42cSAsias He 
33b4fdde6dSAsias He struct net_dev;
34b4fdde6dSAsias He 
35b4fdde6dSAsias He struct net_dev_operations {
36b4fdde6dSAsias He 	int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
37b4fdde6dSAsias He 	int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
38b4fdde6dSAsias He };
39b4fdde6dSAsias He 
40ad96e867SJean-Philippe Brucker struct net_dev_queue {
41ad96e867SJean-Philippe Brucker 	int				id;
42ad96e867SJean-Philippe Brucker 	struct net_dev			*ndev;
43ad96e867SJean-Philippe Brucker 	struct virt_queue		vq;
44ad96e867SJean-Philippe Brucker 	pthread_t			thread;
45ad96e867SJean-Philippe Brucker 	struct mutex			lock;
46ad96e867SJean-Philippe Brucker 	pthread_cond_t			cond;
4749bada43SJean-Philippe Brucker 	int				gsi;
4849bada43SJean-Philippe Brucker 	int				irqfd;
49ad96e867SJean-Philippe Brucker };
50ad96e867SJean-Philippe Brucker 
518626798bSAsias He struct net_dev {
52d3476f7dSSasha Levin 	struct mutex			mutex;
5302eca50cSAsias He 	struct virtio_device		vdev;
549a6d73f1SSasha Levin 	struct list_head		list;
554f56d42cSAsias He 
56ad96e867SJean-Philippe Brucker 	struct net_dev_queue		queues[VIRTIO_NET_NUM_QUEUES * 2 + 1];
57c229370aSIngo Molnar 	struct virtio_net_config	config;
58902a8ecbSJean-Philippe Brucker 	u32				queue_pairs;
59c4aa7c02SPekka Enberg 
609ed67cdcSSasha Levin 	int				vhost_fd;
614f56d42cSAsias He 	int				tap_fd;
624f56d42cSAsias He 	char				tap_name[IFNAMSIZ];
6359ee54ebSWei Chen 	bool				tap_ufo;
64bb1a32f1SAsias He 
65bb1a32f1SAsias He 	int				mode;
66bb1a32f1SAsias He 
67b5ee1ea7SAsias He 	struct uip_info			info;
68b4fdde6dSAsias He 	struct net_dev_operations	*ops;
699a6d73f1SSasha Levin 	struct kvm			*kvm;
708c0ae74dSSasha Levin 
718c0ae74dSSasha Levin 	struct virtio_net_params	*params;
724f56d42cSAsias He };
734f56d42cSAsias He 
749a6d73f1SSasha Levin static LIST_HEAD(ndevs);
75312c62d1SSasha Levin static int compat_id = -1;
764f56d42cSAsias He 
777668c3a6SSasha Levin #define MAX_PACKET_SIZE 65550
787668c3a6SSasha Levin 
797668c3a6SSasha Levin static bool has_virtio_feature(struct net_dev *ndev, u32 feature)
807668c3a6SSasha Levin {
81902a8ecbSJean-Philippe Brucker 	return ndev->vdev.features & (1 << feature);
827668c3a6SSasha Levin }
837668c3a6SSasha Levin 
84c4aa7c02SPekka Enberg static void *virtio_net_rx_thread(void *p)
854f56d42cSAsias He {
864f56d42cSAsias He 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
87ad96e867SJean-Philippe Brucker 	struct net_dev_queue *queue = p;
88ad96e867SJean-Philippe Brucker 	struct virt_queue *vq = &queue->vq;
89ad96e867SJean-Philippe Brucker 	struct net_dev *ndev = queue->ndev;
9043835ac9SSasha Levin 	struct kvm *kvm;
913fdf659dSSasha Levin 	u16 out, in;
923fdf659dSSasha Levin 	u16 head;
936d6cc14bSWill Deacon 	int len, copied;
944f56d42cSAsias He 
95a4d8c55eSSasha Levin 	kvm__set_thread_name("virtio-net-rx");
96a4d8c55eSSasha Levin 
979a6d73f1SSasha Levin 	kvm = ndev->kvm;
98c4aa7c02SPekka Enberg 	while (1) {
99ad96e867SJean-Philippe Brucker 		mutex_lock(&queue->lock);
100c4aa7c02SPekka Enberg 		if (!virt_queue__available(vq))
101ad96e867SJean-Philippe Brucker 			pthread_cond_wait(&queue->cond, &queue->lock.mutex);
102ad96e867SJean-Philippe Brucker 		mutex_unlock(&queue->lock);
1034f56d42cSAsias He 
1044f56d42cSAsias He 		while (virt_queue__available(vq)) {
1057668c3a6SSasha Levin 			unsigned char buffer[MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)];
1067668c3a6SSasha Levin 			struct iovec dummy_iov = {
1077668c3a6SSasha Levin 				.iov_base = buffer,
1087668c3a6SSasha Levin 				.iov_len  = sizeof(buffer),
1097668c3a6SSasha Levin 			};
1107668c3a6SSasha Levin 			struct virtio_net_hdr_mrg_rxbuf *hdr;
1113fea89a9SWill Deacon 			u16 num_buffers;
1127f5ffaf5SAsias He 
1137668c3a6SSasha Levin 			len = ndev->ops->rx(&dummy_iov, 1, ndev);
1146d6cc14bSWill Deacon 			if (len < 0) {
1156d6cc14bSWill Deacon 				pr_warning("%s: rx on vq %u failed (%d), exiting thread\n",
116ad96e867SJean-Philippe Brucker 						__func__, queue->id, len);
1176d6cc14bSWill Deacon 				goto out_err;
1186d6cc14bSWill Deacon 			}
1196d6cc14bSWill Deacon 
1203fea89a9SWill Deacon 			copied = num_buffers = 0;
1217668c3a6SSasha Levin 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
1228ed60bbeSMarc Zyngier 			hdr = iov[0].iov_base;
1237668c3a6SSasha Levin 			while (copied < len) {
1246d6cc14bSWill Deacon 				size_t iovsize = min_t(size_t, len - copied, iov_size(iov, in));
1257668c3a6SSasha Levin 
126e2493047SAsias He 				memcpy_toiovec(iov, buffer + copied, iovsize);
1277668c3a6SSasha Levin 				copied += iovsize;
1283fea89a9SWill Deacon 				virt_queue__set_used_elem_no_update(vq, head, iovsize, num_buffers++);
1297668c3a6SSasha Levin 				if (copied == len)
1307668c3a6SSasha Levin 					break;
1317668c3a6SSasha Levin 				while (!virt_queue__available(vq))
1327668c3a6SSasha Levin 					sleep(0);
1337668c3a6SSasha Levin 				head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
1347668c3a6SSasha Levin 			}
1353fea89a9SWill Deacon 
1363fea89a9SWill Deacon 			if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF))
1373fea89a9SWill Deacon 				hdr->num_buffers = virtio_host_to_guest_u16(vq, num_buffers);
1383fea89a9SWill Deacon 
1393fea89a9SWill Deacon 			virt_queue__used_idx_advance(vq, num_buffers);
1403fea89a9SWill Deacon 
141c4aa7c02SPekka Enberg 			/* We should interrupt guest right now, otherwise latency is huge. */
1426585ffb7SSasha Levin 			if (virtio_queue__should_signal(vq))
143ad96e867SJean-Philippe Brucker 				ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id);
1444f56d42cSAsias He 		}
145c4aa7c02SPekka Enberg 	}
146c4aa7c02SPekka Enberg 
1476d6cc14bSWill Deacon out_err:
148c4aa7c02SPekka Enberg 	pthread_exit(NULL);
149c4aa7c02SPekka Enberg 	return NULL;
150c4aa7c02SPekka Enberg 
151c4aa7c02SPekka Enberg }
152c4aa7c02SPekka Enberg 
153c4aa7c02SPekka Enberg static void *virtio_net_tx_thread(void *p)
1544f56d42cSAsias He {
1554f56d42cSAsias He 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
156ad96e867SJean-Philippe Brucker 	struct net_dev_queue *queue = p;
157ad96e867SJean-Philippe Brucker 	struct virt_queue *vq = &queue->vq;
158ad96e867SJean-Philippe Brucker 	struct net_dev *ndev = queue->ndev;
15943835ac9SSasha Levin 	struct kvm *kvm;
1603fdf659dSSasha Levin 	u16 out, in;
1613fdf659dSSasha Levin 	u16 head;
1624f56d42cSAsias He 	int len;
1634f56d42cSAsias He 
164a4d8c55eSSasha Levin 	kvm__set_thread_name("virtio-net-tx");
165a4d8c55eSSasha Levin 
1669a6d73f1SSasha Levin 	kvm = ndev->kvm;
167c4aa7c02SPekka Enberg 
168c4aa7c02SPekka Enberg 	while (1) {
169ad96e867SJean-Philippe Brucker 		mutex_lock(&queue->lock);
170c4aa7c02SPekka Enberg 		if (!virt_queue__available(vq))
171ad96e867SJean-Philippe Brucker 			pthread_cond_wait(&queue->cond, &queue->lock.mutex);
172ad96e867SJean-Philippe Brucker 		mutex_unlock(&queue->lock);
1734f56d42cSAsias He 
1744f56d42cSAsias He 		while (virt_queue__available(vq)) {
17543835ac9SSasha Levin 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
1769a6d73f1SSasha Levin 			len = ndev->ops->tx(iov, out, ndev);
1776d6cc14bSWill Deacon 			if (len < 0) {
1786d6cc14bSWill Deacon 				pr_warning("%s: tx on vq %u failed (%d)\n",
179ad96e867SJean-Philippe Brucker 						__func__, queue->id, errno);
1806d6cc14bSWill Deacon 				goto out_err;
1816d6cc14bSWill Deacon 			}
1826d6cc14bSWill Deacon 
1834f56d42cSAsias He 			virt_queue__set_used_elem(vq, head, len);
1844f56d42cSAsias He 		}
1854f56d42cSAsias He 
1866585ffb7SSasha Levin 		if (virtio_queue__should_signal(vq))
187ad96e867SJean-Philippe Brucker 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id);
1884f56d42cSAsias He 	}
1894f56d42cSAsias He 
1906d6cc14bSWill Deacon out_err:
191c4aa7c02SPekka Enberg 	pthread_exit(NULL);
192c4aa7c02SPekka Enberg 	return NULL;
193c4aa7c02SPekka Enberg }
194407475bfSPekka Enberg 
1956585ffb7SSasha Levin static virtio_net_ctrl_ack virtio_net_handle_mq(struct kvm* kvm, struct net_dev *ndev, struct virtio_net_ctrl_hdr *ctrl)
1966585ffb7SSasha Levin {
1976585ffb7SSasha Levin 	/* Not much to do here */
1986585ffb7SSasha Levin 	return VIRTIO_NET_OK;
1996585ffb7SSasha Levin }
2006585ffb7SSasha Levin 
2016585ffb7SSasha Levin static void *virtio_net_ctrl_thread(void *p)
202abd4a801SSasha Levin {
203abd4a801SSasha Levin 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
204ad96e867SJean-Philippe Brucker 	struct net_dev_queue *queue = p;
205ad96e867SJean-Philippe Brucker 	struct virt_queue *vq = &queue->vq;
206ad96e867SJean-Philippe Brucker 	struct net_dev *ndev = queue->ndev;
207abd4a801SSasha Levin 	u16 out, in, head;
2086585ffb7SSasha Levin 	struct kvm *kvm = ndev->kvm;
209abd4a801SSasha Levin 	struct virtio_net_ctrl_hdr *ctrl;
210abd4a801SSasha Levin 	virtio_net_ctrl_ack *ack;
211abd4a801SSasha Levin 
212edb4a8a0SSuzuki K. Poulose 	kvm__set_thread_name("virtio-net-ctrl");
213edb4a8a0SSuzuki K. Poulose 
2146585ffb7SSasha Levin 	while (1) {
215ad96e867SJean-Philippe Brucker 		mutex_lock(&queue->lock);
2166585ffb7SSasha Levin 		if (!virt_queue__available(vq))
217ad96e867SJean-Philippe Brucker 			pthread_cond_wait(&queue->cond, &queue->lock.mutex);
218ad96e867SJean-Philippe Brucker 		mutex_unlock(&queue->lock);
2196585ffb7SSasha Levin 
2206585ffb7SSasha Levin 		while (virt_queue__available(vq)) {
221ad96e867SJean-Philippe Brucker 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
222abd4a801SSasha Levin 			ctrl = iov[0].iov_base;
223abd4a801SSasha Levin 			ack = iov[out].iov_base;
224abd4a801SSasha Levin 
225abd4a801SSasha Levin 			switch (ctrl->class) {
2266585ffb7SSasha Levin 			case VIRTIO_NET_CTRL_MQ:
2276585ffb7SSasha Levin 				*ack = virtio_net_handle_mq(kvm, ndev, ctrl);
2286585ffb7SSasha Levin 				break;
229abd4a801SSasha Levin 			default:
230abd4a801SSasha Levin 				*ack = VIRTIO_NET_ERR;
231abd4a801SSasha Levin 				break;
232abd4a801SSasha Levin 			}
233ad96e867SJean-Philippe Brucker 			virt_queue__set_used_elem(vq, head, iov[out].iov_len);
2346585ffb7SSasha Levin 		}
235abd4a801SSasha Levin 
236ad96e867SJean-Philippe Brucker 		if (virtio_queue__should_signal(vq))
237ad96e867SJean-Philippe Brucker 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id);
2386585ffb7SSasha Levin 	}
239abd4a801SSasha Levin 
2406585ffb7SSasha Levin 	pthread_exit(NULL);
241abd4a801SSasha Levin 
2426585ffb7SSasha Levin 	return NULL;
243abd4a801SSasha Levin }
244abd4a801SSasha Levin 
2459a6d73f1SSasha Levin static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue)
2464f56d42cSAsias He {
247ad96e867SJean-Philippe Brucker 	struct net_dev_queue *net_queue = &ndev->queues[queue];
248ad96e867SJean-Philippe Brucker 
24979052597SSasha Levin 	if ((u32)queue >= (ndev->queue_pairs * 2 + 1)) {
2509a6d73f1SSasha Levin 		pr_warning("Unknown queue index %u", queue);
2516585ffb7SSasha Levin 		return;
252c4aa7c02SPekka Enberg 	}
2536585ffb7SSasha Levin 
254ad96e867SJean-Philippe Brucker 	mutex_lock(&net_queue->lock);
255ad96e867SJean-Philippe Brucker 	pthread_cond_signal(&net_queue->cond);
256ad96e867SJean-Philippe Brucker 	mutex_unlock(&net_queue->lock);
2574f56d42cSAsias He }
2584f56d42cSAsias He 
259d2a7ddffSMarc Zyngier static int virtio_net_request_tap(struct net_dev *ndev, struct ifreq *ifr,
260d2a7ddffSMarc Zyngier 				  const char *tapname)
261d2a7ddffSMarc Zyngier {
262d2a7ddffSMarc Zyngier 	int ret;
263d2a7ddffSMarc Zyngier 
264d2a7ddffSMarc Zyngier 	memset(ifr, 0, sizeof(*ifr));
265d2a7ddffSMarc Zyngier 	ifr->ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
266d2a7ddffSMarc Zyngier 	if (tapname)
26705755b29SAndre Przywara 		strlcpy(ifr->ifr_name, tapname, sizeof(ifr->ifr_name));
268d2a7ddffSMarc Zyngier 
269f83dc816SAndreas Herrmann 	ret = ioctl(ndev->tap_fd, TUNSETIFF, ifr);
270d2a7ddffSMarc Zyngier 
271d2a7ddffSMarc Zyngier 	if (ret >= 0)
27205755b29SAndre Przywara 		strlcpy(ndev->tap_name, ifr->ifr_name, sizeof(ndev->tap_name));
273d2a7ddffSMarc Zyngier 	return ret;
274d2a7ddffSMarc Zyngier }
275d2a7ddffSMarc Zyngier 
276eef27ae3SFan Du static int virtio_net_exec_script(const char* script, const char *tap_name)
277eef27ae3SFan Du {
278eef27ae3SFan Du 	pid_t pid;
279eef27ae3SFan Du 	int status;
280eef27ae3SFan Du 
281eef27ae3SFan Du 	pid = fork();
282eef27ae3SFan Du 	if (pid == 0) {
283eef27ae3SFan Du 		execl(script, script, tap_name, NULL);
284eef27ae3SFan Du 		_exit(1);
285eef27ae3SFan Du 	} else {
286eef27ae3SFan Du 		waitpid(pid, &status, 0);
287eef27ae3SFan Du 		if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
288eef27ae3SFan Du 			pr_warning("Fail to setup tap by %s", script);
289eef27ae3SFan Du 			return -1;
290eef27ae3SFan Du 		}
291eef27ae3SFan Du 	}
292eef27ae3SFan Du 	return 0;
293eef27ae3SFan Du }
294eef27ae3SFan Du 
2958c0ae74dSSasha Levin static bool virtio_net__tap_init(struct net_dev *ndev)
2964f56d42cSAsias He {
297cb7202c1SSasha Levin 	int sock = socket(AF_INET, SOCK_STREAM, 0);
29859ee54ebSWei Chen 	int hdr_len;
299cb7202c1SSasha Levin 	struct sockaddr_in sin = {0};
300246c8347SAsias He 	struct ifreq ifr;
3018c0ae74dSSasha Levin 	const struct virtio_net_params *params = ndev->params;
3021dfc7c24SMarc Zyngier 	bool skipconf = !!params->tapif;
3034f56d42cSAsias He 
3047668c3a6SSasha Levin 	hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ?
30548c6b47aSSasha Levin 			sizeof(struct virtio_net_hdr_mrg_rxbuf) :
30648c6b47aSSasha Levin 			sizeof(struct virtio_net_hdr);
3079a6d73f1SSasha Levin 	if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
3084542f276SCyrill Gorcunov 		pr_warning("Config tap device TUNSETVNETHDRSZ error");
309246c8347SAsias He 
31073b7d038SAmos Kong 	if (strcmp(params->script, "none")) {
311eef27ae3SFan Du 		if (virtio_net_exec_script(params->script, ndev->tap_name) < 0)
31273b7d038SAmos Kong 			goto fail;
3131dfc7c24SMarc Zyngier 	} else if (!skipconf) {
314cb7202c1SSasha Levin 		memset(&ifr, 0, sizeof(ifr));
31516509081SAnisse Astier 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ifr.ifr_name));
316bdfcfca6SSasha Levin 		sin.sin_addr.s_addr = inet_addr(params->host_ip);
317cb7202c1SSasha Levin 		memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
318cb7202c1SSasha Levin 		ifr.ifr_addr.sa_family = AF_INET;
3193b02f580SSasha Levin 		if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
3204542f276SCyrill Gorcunov 			pr_warning("Could not set ip address on tap device");
3213b02f580SSasha Levin 			goto fail;
3223b02f580SSasha Levin 		}
32373b7d038SAmos Kong 	}
324cb7202c1SSasha Levin 
3251dfc7c24SMarc Zyngier 	if (!skipconf) {
326cb7202c1SSasha Levin 		memset(&ifr, 0, sizeof(ifr));
32716509081SAnisse Astier 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ifr.ifr_name));
328cb7202c1SSasha Levin 		ioctl(sock, SIOCGIFFLAGS, &ifr);
329cb7202c1SSasha Levin 		ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
330cb7202c1SSasha Levin 		if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
3314542f276SCyrill Gorcunov 			pr_warning("Could not bring tap device up");
3321dfc7c24SMarc Zyngier 	}
333cb7202c1SSasha Levin 
334cb7202c1SSasha Levin 	close(sock);
3353b02f580SSasha Levin 
3363b02f580SSasha Levin 	return 1;
3373b02f580SSasha Levin 
3383b02f580SSasha Levin fail:
3393b02f580SSasha Levin 	if (sock >= 0)
3403b02f580SSasha Levin 		close(sock);
3419a6d73f1SSasha Levin 	if (ndev->tap_fd >= 0)
3429a6d73f1SSasha Levin 		close(ndev->tap_fd);
3433b02f580SSasha Levin 
3443b02f580SSasha Levin 	return 0;
3454f56d42cSAsias He }
3464f56d42cSAsias He 
34749bada43SJean-Philippe Brucker static void virtio_net__tap_exit(struct net_dev *ndev)
34849bada43SJean-Philippe Brucker {
34949bada43SJean-Philippe Brucker 	int sock;
35049bada43SJean-Philippe Brucker 	struct ifreq ifr;
35149bada43SJean-Philippe Brucker 
35249bada43SJean-Philippe Brucker 	if (ndev->params->tapif)
35349bada43SJean-Philippe Brucker 		return;
35449bada43SJean-Philippe Brucker 
35549bada43SJean-Philippe Brucker 	sock = socket(AF_INET, SOCK_STREAM, 0);
35616509081SAnisse Astier 	strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ifr.ifr_name));
35749bada43SJean-Philippe Brucker 	ioctl(sock, SIOCGIFFLAGS, &ifr);
35849bada43SJean-Philippe Brucker 	ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING);
35949bada43SJean-Philippe Brucker 	if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0)
36049bada43SJean-Philippe Brucker 		pr_warning("Count not bring tap device down");
36149bada43SJean-Philippe Brucker 	close(sock);
36249bada43SJean-Philippe Brucker }
36349bada43SJean-Philippe Brucker 
36459ee54ebSWei Chen static bool virtio_net__tap_create(struct net_dev *ndev)
36559ee54ebSWei Chen {
36659ee54ebSWei Chen 	int offload;
36759ee54ebSWei Chen 	struct ifreq ifr;
36859ee54ebSWei Chen 	const struct virtio_net_params *params = ndev->params;
36959ee54ebSWei Chen 	bool macvtap = (!!params->tapif) && (params->tapif[0] == '/');
37059ee54ebSWei Chen 
37159ee54ebSWei Chen 	/* Did the user already gave us the FD? */
37259ee54ebSWei Chen 	if (params->fd)
37359ee54ebSWei Chen 		ndev->tap_fd = params->fd;
37459ee54ebSWei Chen 	else {
37559ee54ebSWei Chen 		const char *tap_file = "/dev/net/tun";
37659ee54ebSWei Chen 
37759ee54ebSWei Chen 		/* Did the user ask us to use macvtap? */
37859ee54ebSWei Chen 		if (macvtap)
37959ee54ebSWei Chen 			tap_file = params->tapif;
38059ee54ebSWei Chen 
38159ee54ebSWei Chen 		ndev->tap_fd = open(tap_file, O_RDWR);
38259ee54ebSWei Chen 		if (ndev->tap_fd < 0) {
38359ee54ebSWei Chen 			pr_warning("Unable to open %s", tap_file);
38459ee54ebSWei Chen 			return 0;
38559ee54ebSWei Chen 		}
38659ee54ebSWei Chen 	}
38759ee54ebSWei Chen 
38859ee54ebSWei Chen 	if (!macvtap &&
38959ee54ebSWei Chen 	    virtio_net_request_tap(ndev, &ifr, params->tapif) < 0) {
39059ee54ebSWei Chen 		pr_warning("Config tap device error. Are you root?");
39159ee54ebSWei Chen 		goto fail;
39259ee54ebSWei Chen 	}
39359ee54ebSWei Chen 
39459ee54ebSWei Chen 	/*
39559ee54ebSWei Chen 	 * The UFO support had been removed from kernel in commit:
39659ee54ebSWei Chen 	 * ID: fb652fdfe83710da0ca13448a41b7ed027d0a984
39759ee54ebSWei Chen 	 * https://www.spinics.net/lists/netdev/msg443562.html
39859ee54ebSWei Chen 	 * In oder to support the older kernels without this commit,
39959ee54ebSWei Chen 	 * we set the TUN_F_UFO to offload by default to test the status of
40059ee54ebSWei Chen 	 * UFO kernel support.
40159ee54ebSWei Chen 	 */
40259ee54ebSWei Chen 	ndev->tap_ufo = true;
40359ee54ebSWei Chen 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
40459ee54ebSWei Chen 	if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
40559ee54ebSWei Chen 		/*
40659ee54ebSWei Chen 		 * Is this failure caused by kernel remove the UFO support?
40759ee54ebSWei Chen 		 * Try TUNSETOFFLOAD without TUN_F_UFO.
40859ee54ebSWei Chen 		 */
40959ee54ebSWei Chen 		offload &= ~TUN_F_UFO;
41059ee54ebSWei Chen 		if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
41159ee54ebSWei Chen 			pr_warning("Config tap device TUNSETOFFLOAD error");
41259ee54ebSWei Chen 			goto fail;
41359ee54ebSWei Chen 		}
41459ee54ebSWei Chen 		ndev->tap_ufo = false;
41559ee54ebSWei Chen 	}
41659ee54ebSWei Chen 
41759ee54ebSWei Chen 	return 1;
41859ee54ebSWei Chen 
41959ee54ebSWei Chen fail:
42059ee54ebSWei Chen 	if ((ndev->tap_fd >= 0) || (!params->fd) )
42159ee54ebSWei Chen 		close(ndev->tap_fd);
42259ee54ebSWei Chen 
42359ee54ebSWei Chen 	return 0;
42459ee54ebSWei Chen }
42559ee54ebSWei Chen 
426b4fdde6dSAsias He static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
427b4fdde6dSAsias He {
428b4fdde6dSAsias He 	return writev(ndev->tap_fd, iov, out);
429b4fdde6dSAsias He }
430b4fdde6dSAsias He 
431b4fdde6dSAsias He static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
432b4fdde6dSAsias He {
433b4fdde6dSAsias He 	return readv(ndev->tap_fd, iov, in);
434b4fdde6dSAsias He }
435b4fdde6dSAsias He 
436b4fdde6dSAsias He static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
437b4fdde6dSAsias He {
438b4fdde6dSAsias He 	return uip_tx(iov, out, &ndev->info);
439b4fdde6dSAsias He }
440b4fdde6dSAsias He 
441b4fdde6dSAsias He static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
442b4fdde6dSAsias He {
443b4fdde6dSAsias He 	return uip_rx(iov, in, &ndev->info);
444b4fdde6dSAsias He }
445b4fdde6dSAsias He 
446b4fdde6dSAsias He static struct net_dev_operations tap_ops = {
447b4fdde6dSAsias He 	.rx	= tap_ops_rx,
448b4fdde6dSAsias He 	.tx	= tap_ops_tx,
449b4fdde6dSAsias He };
450b4fdde6dSAsias He 
451b4fdde6dSAsias He static struct net_dev_operations uip_ops = {
452b4fdde6dSAsias He 	.rx	= uip_ops_rx,
453b4fdde6dSAsias He 	.tx	= uip_ops_tx,
454b4fdde6dSAsias He };
455b4fdde6dSAsias He 
456c5ae742bSSasha Levin static u8 *get_config(struct kvm *kvm, void *dev)
45778a2a3e2SSasha Levin {
45878a2a3e2SSasha Levin 	struct net_dev *ndev = dev;
45978a2a3e2SSasha Levin 
460c5ae742bSSasha Levin 	return ((u8 *)(&ndev->config));
46178a2a3e2SSasha Levin }
46278a2a3e2SSasha Levin 
463e4730284SMartin Radev static size_t get_config_size(struct kvm *kvm, void *dev)
464e4730284SMartin Radev {
465e4730284SMartin Radev 	struct net_dev *ndev = dev;
466e4730284SMartin Radev 
467e4730284SMartin Radev 	return sizeof(ndev->config);
468e4730284SMartin Radev }
469e4730284SMartin Radev 
47078a2a3e2SSasha Levin static u32 get_host_features(struct kvm *kvm, void *dev)
47178a2a3e2SSasha Levin {
47259ee54ebSWei Chen 	u32 features;
47379052597SSasha Levin 	struct net_dev *ndev = dev;
47479052597SSasha Levin 
47559ee54ebSWei Chen 	features = 1UL << VIRTIO_NET_F_MAC
47678a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_CSUM
47778a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_HOST_TSO4
47878a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_HOST_TSO6
47978a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_GUEST_TSO4
48092c1f37bSSasha Levin 		| 1UL << VIRTIO_NET_F_GUEST_TSO6
481754c8ce3SSasha Levin 		| 1UL << VIRTIO_RING_F_EVENT_IDX
482abd4a801SSasha Levin 		| 1UL << VIRTIO_RING_F_INDIRECT_DESC
4836585ffb7SSasha Levin 		| 1UL << VIRTIO_NET_F_CTRL_VQ
4847668c3a6SSasha Levin 		| 1UL << VIRTIO_NET_F_MRG_RXBUF
48579052597SSasha Levin 		| 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0);
48659ee54ebSWei Chen 
48759ee54ebSWei Chen 	/*
48859ee54ebSWei Chen 	 * The UFO feature for host and guest only can be enabled when the
48959ee54ebSWei Chen 	 * kernel has TAP UFO support.
49059ee54ebSWei Chen 	 */
49159ee54ebSWei Chen 	if (ndev->tap_ufo)
49259ee54ebSWei Chen 		features |= (1UL << VIRTIO_NET_F_HOST_UFO
49359ee54ebSWei Chen 				| 1UL << VIRTIO_NET_F_GUEST_UFO);
49459ee54ebSWei Chen 
49559ee54ebSWei Chen 	return features;
49678a2a3e2SSasha Levin }
49778a2a3e2SSasha Levin 
49815636d5eSYing-Shiuan Pan static int virtio_net__vhost_set_features(struct net_dev *ndev)
49915636d5eSYing-Shiuan Pan {
50015636d5eSYing-Shiuan Pan 	u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX;
50115636d5eSYing-Shiuan Pan 	u64 vhost_features;
50215636d5eSYing-Shiuan Pan 
50315636d5eSYing-Shiuan Pan 	if (ioctl(ndev->vhost_fd, VHOST_GET_FEATURES, &vhost_features) != 0)
50415636d5eSYing-Shiuan Pan 		die_perror("VHOST_GET_FEATURES failed");
50515636d5eSYing-Shiuan Pan 
50615636d5eSYing-Shiuan Pan 	/* make sure both side support mergable rx buffers */
50715636d5eSYing-Shiuan Pan 	if (vhost_features & 1UL << VIRTIO_NET_F_MRG_RXBUF &&
50815636d5eSYing-Shiuan Pan 			has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF))
50915636d5eSYing-Shiuan Pan 		features |= 1UL << VIRTIO_NET_F_MRG_RXBUF;
51015636d5eSYing-Shiuan Pan 
51115636d5eSYing-Shiuan Pan 	return ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features);
51215636d5eSYing-Shiuan Pan }
51315636d5eSYing-Shiuan Pan 
51495242e44SJean-Philippe Brucker static void virtio_net_start(struct net_dev *ndev)
51595242e44SJean-Philippe Brucker {
5168aa155c4SYing-Shiuan Pan 	if (ndev->mode == NET_MODE_TAP) {
5178aa155c4SYing-Shiuan Pan 		if (!virtio_net__tap_init(ndev))
51859ee54ebSWei Chen 			die_perror("TAP device initialized failed because");
51959ee54ebSWei Chen 
52015636d5eSYing-Shiuan Pan 		if (ndev->vhost_fd &&
52115636d5eSYing-Shiuan Pan 				virtio_net__vhost_set_features(ndev) != 0)
52215636d5eSYing-Shiuan Pan 			die_perror("VHOST_SET_FEATURES failed");
5238aa155c4SYing-Shiuan Pan 	} else {
5248aa155c4SYing-Shiuan Pan 		ndev->info.vnet_hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ?
5258aa155c4SYing-Shiuan Pan 						sizeof(struct virtio_net_hdr_mrg_rxbuf) :
5268aa155c4SYing-Shiuan Pan 						sizeof(struct virtio_net_hdr);
5278aa155c4SYing-Shiuan Pan 		uip_init(&ndev->info);
5288aa155c4SYing-Shiuan Pan 	}
52978a2a3e2SSasha Levin }
53078a2a3e2SSasha Levin 
53149bada43SJean-Philippe Brucker static void virtio_net_stop(struct net_dev *ndev)
53249bada43SJean-Philippe Brucker {
53349bada43SJean-Philippe Brucker 	/* Undo whatever start() did */
53449bada43SJean-Philippe Brucker 	if (ndev->mode == NET_MODE_TAP)
53549bada43SJean-Philippe Brucker 		virtio_net__tap_exit(ndev);
53649bada43SJean-Philippe Brucker 	else
53749bada43SJean-Philippe Brucker 		uip_exit(&ndev->info);
53849bada43SJean-Philippe Brucker }
53949bada43SJean-Philippe Brucker 
540867b15ccSJean-Philippe Brucker static void virtio_net_update_endian(struct net_dev *ndev)
541867b15ccSJean-Philippe Brucker {
542867b15ccSJean-Philippe Brucker 	struct virtio_net_config *conf = &ndev->config;
543867b15ccSJean-Philippe Brucker 
544867b15ccSJean-Philippe Brucker 	conf->status = virtio_host_to_guest_u16(&ndev->vdev,
545867b15ccSJean-Philippe Brucker 						VIRTIO_NET_S_LINK_UP);
546867b15ccSJean-Philippe Brucker 	conf->max_virtqueue_pairs = virtio_host_to_guest_u16(&ndev->vdev,
547867b15ccSJean-Philippe Brucker 							     ndev->queue_pairs);
548*8b27bcffSJean-Philippe Brucker 
549*8b27bcffSJean-Philippe Brucker 	/* Let TAP know about vnet header endianness */
550*8b27bcffSJean-Philippe Brucker 	if (ndev->mode == NET_MODE_TAP &&
551*8b27bcffSJean-Philippe Brucker 	    ndev->vdev.endian != VIRTIO_ENDIAN_HOST) {
552*8b27bcffSJean-Philippe Brucker 		int enable_val = 1, disable_val = 0;
553*8b27bcffSJean-Philippe Brucker 		int enable_req, disable_req;
554*8b27bcffSJean-Philippe Brucker 
555*8b27bcffSJean-Philippe Brucker 		if (ndev->vdev.endian == VIRTIO_ENDIAN_LE) {
556*8b27bcffSJean-Philippe Brucker 			enable_req = TUNSETVNETLE;
557*8b27bcffSJean-Philippe Brucker 			disable_req = TUNSETVNETBE;
558*8b27bcffSJean-Philippe Brucker 		} else {
559*8b27bcffSJean-Philippe Brucker 			enable_req = TUNSETVNETBE;
560*8b27bcffSJean-Philippe Brucker 			disable_req = TUNSETVNETLE;
561*8b27bcffSJean-Philippe Brucker 		}
562*8b27bcffSJean-Philippe Brucker 
563*8b27bcffSJean-Philippe Brucker 		ioctl(ndev->tap_fd, disable_req, &disable_val);
564*8b27bcffSJean-Philippe Brucker 		if (ioctl(ndev->tap_fd, enable_req, &enable_val) < 0)
565*8b27bcffSJean-Philippe Brucker 			pr_err("Config tap device TUNSETVNETLE/BE error");
566*8b27bcffSJean-Philippe Brucker 	}
567867b15ccSJean-Philippe Brucker }
568867b15ccSJean-Philippe Brucker 
56995242e44SJean-Philippe Brucker static void notify_status(struct kvm *kvm, void *dev, u32 status)
57095242e44SJean-Philippe Brucker {
571867b15ccSJean-Philippe Brucker 	struct net_dev *ndev = dev;
572867b15ccSJean-Philippe Brucker 
573867b15ccSJean-Philippe Brucker 	if (status & VIRTIO__STATUS_CONFIG)
574867b15ccSJean-Philippe Brucker 		virtio_net_update_endian(ndev);
575867b15ccSJean-Philippe Brucker 
57695242e44SJean-Philippe Brucker 	if (status & VIRTIO__STATUS_START)
57795242e44SJean-Philippe Brucker 		virtio_net_start(dev);
57849bada43SJean-Philippe Brucker 	else if (status & VIRTIO__STATUS_STOP)
57949bada43SJean-Philippe Brucker 		virtio_net_stop(dev);
58095242e44SJean-Philippe Brucker }
58195242e44SJean-Philippe Brucker 
5829e56ec14SSasha Levin static bool is_ctrl_vq(struct net_dev *ndev, u32 vq)
5839e56ec14SSasha Levin {
5849e56ec14SSasha Levin 	return vq == (u32)(ndev->queue_pairs * 2);
5859e56ec14SSasha Levin }
5869e56ec14SSasha Levin 
587609ee906SJean-Philippe Brucker static int init_vq(struct kvm *kvm, void *dev, u32 vq)
58878a2a3e2SSasha Levin {
5899ed67cdcSSasha Levin 	struct vhost_vring_state state = { .index = vq };
590ad96e867SJean-Philippe Brucker 	struct net_dev_queue *net_queue;
5919ed67cdcSSasha Levin 	struct vhost_vring_addr addr;
59278a2a3e2SSasha Levin 	struct net_dev *ndev = dev;
59378a2a3e2SSasha Levin 	struct virt_queue *queue;
5949ed67cdcSSasha Levin 	int r;
59578a2a3e2SSasha Levin 
596312c62d1SSasha Levin 	compat__remove_message(compat_id);
59778a2a3e2SSasha Levin 
598ad96e867SJean-Philippe Brucker 	net_queue	= &ndev->queues[vq];
599ad96e867SJean-Philippe Brucker 	net_queue->id	= vq;
600ad96e867SJean-Philippe Brucker 	net_queue->ndev	= ndev;
601ad96e867SJean-Philippe Brucker 	queue		= &net_queue->vq;
602609ee906SJean-Philippe Brucker 	virtio_init_device_vq(kvm, &ndev->vdev, queue, VIRTIO_NET_QUEUE_SIZE);
60378a2a3e2SSasha Levin 
604ad96e867SJean-Philippe Brucker 	mutex_init(&net_queue->lock);
605ad96e867SJean-Philippe Brucker 	pthread_cond_init(&net_queue->cond, NULL);
6069e56ec14SSasha Levin 	if (is_ctrl_vq(ndev, vq)) {
607ad96e867SJean-Philippe Brucker 		pthread_create(&net_queue->thread, NULL, virtio_net_ctrl_thread,
608ad96e867SJean-Philippe Brucker 			       net_queue);
6099e56ec14SSasha Levin 
6109e56ec14SSasha Levin 		return 0;
6119e56ec14SSasha Levin 	} else if (ndev->vhost_fd == 0 ) {
6129e56ec14SSasha Levin 		if (vq & 1)
613ad96e867SJean-Philippe Brucker 			pthread_create(&net_queue->thread, NULL,
614ad96e867SJean-Philippe Brucker 				       virtio_net_tx_thread, net_queue);
6156585ffb7SSasha Levin 		else
616ad96e867SJean-Philippe Brucker 			pthread_create(&net_queue->thread, NULL,
617ad96e867SJean-Philippe Brucker 				       virtio_net_rx_thread, net_queue);
6186585ffb7SSasha Levin 
61978a2a3e2SSasha Levin 		return 0;
6206585ffb7SSasha Levin 	}
6219ed67cdcSSasha Levin 
6228ed60bbeSMarc Zyngier 	if (queue->endian != VIRTIO_ENDIAN_HOST)
623b960f243SAndre Przywara 		die_perror("VHOST requires the same endianness in guest and host");
6248ed60bbeSMarc Zyngier 
6259ed67cdcSSasha Levin 	state.num = queue->vring.num;
6269ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state);
6279ed67cdcSSasha Levin 	if (r < 0)
6289ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_NUM failed");
6299ed67cdcSSasha Levin 	state.num = 0;
6309ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state);
6319ed67cdcSSasha Levin 	if (r < 0)
6329ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_BASE failed");
6339ed67cdcSSasha Levin 
6349ed67cdcSSasha Levin 	addr = (struct vhost_vring_addr) {
6359ed67cdcSSasha Levin 		.index = vq,
6369ed67cdcSSasha Levin 		.desc_user_addr = (u64)(unsigned long)queue->vring.desc,
6379ed67cdcSSasha Levin 		.avail_user_addr = (u64)(unsigned long)queue->vring.avail,
6389ed67cdcSSasha Levin 		.used_user_addr = (u64)(unsigned long)queue->vring.used,
6399ed67cdcSSasha Levin 	};
6409ed67cdcSSasha Levin 
6419ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr);
6429ed67cdcSSasha Levin 	if (r < 0)
6439ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_ADDR failed");
6449ed67cdcSSasha Levin 
6459ed67cdcSSasha Levin 	return 0;
6469ed67cdcSSasha Levin }
6479ed67cdcSSasha Levin 
64849bada43SJean-Philippe Brucker static void exit_vq(struct kvm *kvm, void *dev, u32 vq)
64949bada43SJean-Philippe Brucker {
65049bada43SJean-Philippe Brucker 	struct net_dev *ndev = dev;
65149bada43SJean-Philippe Brucker 	struct net_dev_queue *queue = &ndev->queues[vq];
65249bada43SJean-Philippe Brucker 
65349bada43SJean-Philippe Brucker 	if (!is_ctrl_vq(ndev, vq) && queue->gsi) {
65449bada43SJean-Philippe Brucker 		irq__del_irqfd(kvm, queue->gsi, queue->irqfd);
65549bada43SJean-Philippe Brucker 		close(queue->irqfd);
65649bada43SJean-Philippe Brucker 		queue->gsi = queue->irqfd = 0;
65749bada43SJean-Philippe Brucker 	}
65849bada43SJean-Philippe Brucker 
65949bada43SJean-Philippe Brucker 	/*
66049bada43SJean-Philippe Brucker 	 * TODO: vhost reset owner. It's the only way to cleanly stop vhost, but
66149bada43SJean-Philippe Brucker 	 * we can't restart it at the moment.
66249bada43SJean-Philippe Brucker 	 */
66349bada43SJean-Philippe Brucker 	if (ndev->vhost_fd && !is_ctrl_vq(ndev, vq)) {
66449bada43SJean-Philippe Brucker 		pr_warning("Cannot reset VHOST queue");
66549bada43SJean-Philippe Brucker 		ioctl(ndev->vhost_fd, VHOST_RESET_OWNER);
66649bada43SJean-Philippe Brucker 		return;
66749bada43SJean-Philippe Brucker 	}
66849bada43SJean-Philippe Brucker 
66949bada43SJean-Philippe Brucker 	/*
67049bada43SJean-Philippe Brucker 	 * Threads are waiting on cancellation points (readv or
67149bada43SJean-Philippe Brucker 	 * pthread_cond_wait) and should stop gracefully.
67249bada43SJean-Philippe Brucker 	 */
67349bada43SJean-Philippe Brucker 	pthread_cancel(queue->thread);
67449bada43SJean-Philippe Brucker 	pthread_join(queue->thread, NULL);
67549bada43SJean-Philippe Brucker }
67649bada43SJean-Philippe Brucker 
6779ed67cdcSSasha Levin static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
6789ed67cdcSSasha Levin {
6799ed67cdcSSasha Levin 	struct net_dev *ndev = dev;
680ad96e867SJean-Philippe Brucker 	struct net_dev_queue *queue = &ndev->queues[vq];
6819ed67cdcSSasha Levin 	struct vhost_vring_file file;
6829ed67cdcSSasha Levin 	int r;
6839ed67cdcSSasha Levin 
6849ed67cdcSSasha Levin 	if (ndev->vhost_fd == 0)
6859ed67cdcSSasha Levin 		return;
6869ed67cdcSSasha Levin 
6879ed67cdcSSasha Levin 	file = (struct vhost_vring_file) {
6889ed67cdcSSasha Levin 		.index	= vq,
689e59679d2SJean-Philippe Brucker 		.fd	= eventfd(0, 0),
6909ed67cdcSSasha Levin 	};
6919ed67cdcSSasha Levin 
692e59679d2SJean-Philippe Brucker 	r = irq__add_irqfd(kvm, gsi, file.fd, -1);
6939ed67cdcSSasha Levin 	if (r < 0)
6949ed67cdcSSasha Levin 		die_perror("KVM_IRQFD failed");
6959ed67cdcSSasha Levin 
69649bada43SJean-Philippe Brucker 	queue->irqfd = file.fd;
69749bada43SJean-Philippe Brucker 	queue->gsi = gsi;
69849bada43SJean-Philippe Brucker 
6999ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file);
7009ed67cdcSSasha Levin 	if (r < 0)
7019ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_CALL failed");
7029ed67cdcSSasha Levin 	file.fd = ndev->tap_fd;
7039ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file);
7049ed67cdcSSasha Levin 	if (r != 0)
7059ed67cdcSSasha Levin 		die("VHOST_NET_SET_BACKEND failed %d", errno);
7069ed67cdcSSasha Levin 
7079ed67cdcSSasha Levin }
7089ed67cdcSSasha Levin 
7099ed67cdcSSasha Levin static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
7109ed67cdcSSasha Levin {
7119ed67cdcSSasha Levin 	struct net_dev *ndev = dev;
7129ed67cdcSSasha Levin 	struct vhost_vring_file file = {
7139ed67cdcSSasha Levin 		.index	= vq,
7149ed67cdcSSasha Levin 		.fd	= efd,
7159ed67cdcSSasha Levin 	};
7169ed67cdcSSasha Levin 	int r;
7179ed67cdcSSasha Levin 
7189e56ec14SSasha Levin 	if (ndev->vhost_fd == 0 || is_ctrl_vq(ndev, vq))
7199ed67cdcSSasha Levin 		return;
7209ed67cdcSSasha Levin 
7219ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file);
7229ed67cdcSSasha Levin 	if (r < 0)
7239ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_KICK failed");
72478a2a3e2SSasha Levin }
72578a2a3e2SSasha Levin 
72678a2a3e2SSasha Levin static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
72778a2a3e2SSasha Levin {
7289a6d73f1SSasha Levin 	struct net_dev *ndev = dev;
7299a6d73f1SSasha Levin 
7309a6d73f1SSasha Levin 	virtio_net_handle_callback(kvm, ndev, vq);
73178a2a3e2SSasha Levin 
73278a2a3e2SSasha Levin 	return 0;
73378a2a3e2SSasha Levin }
73478a2a3e2SSasha Levin 
73553fbb17bSJean-Philippe Brucker static struct virt_queue *get_vq(struct kvm *kvm, void *dev, u32 vq)
73678a2a3e2SSasha Levin {
73778a2a3e2SSasha Levin 	struct net_dev *ndev = dev;
73878a2a3e2SSasha Levin 
739ad96e867SJean-Philippe Brucker 	return &ndev->queues[vq].vq;
74078a2a3e2SSasha Levin }
74178a2a3e2SSasha Levin 
74278a2a3e2SSasha Levin static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
74378a2a3e2SSasha Levin {
7440f4f51a8SAsias He 	/* FIXME: dynamic */
74578a2a3e2SSasha Levin 	return VIRTIO_NET_QUEUE_SIZE;
74678a2a3e2SSasha Levin }
74778a2a3e2SSasha Levin 
7480f4f51a8SAsias He static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size)
7490f4f51a8SAsias He {
7500f4f51a8SAsias He 	/* FIXME: dynamic */
7510f4f51a8SAsias He 	return size;
7520f4f51a8SAsias He }
7530f4f51a8SAsias He 
75431e0eaccSMartin Radev static unsigned int get_vq_count(struct kvm *kvm, void *dev)
755b98ac591SJean-Philippe Brucker {
756b98ac591SJean-Philippe Brucker 	struct net_dev *ndev = dev;
757b98ac591SJean-Philippe Brucker 
758b98ac591SJean-Philippe Brucker 	return ndev->queue_pairs * 2 + 1;
759b98ac591SJean-Philippe Brucker }
760b98ac591SJean-Philippe Brucker 
76115542babSAndre Przywara static struct virtio_ops net_dev_virtio_ops = {
7621c47ce69SSasha Levin 	.get_config		= get_config,
763e4730284SMartin Radev 	.get_config_size	= get_config_size,
7641c47ce69SSasha Levin 	.get_host_features	= get_host_features,
765b98ac591SJean-Philippe Brucker 	.get_vq_count		= get_vq_count,
7661c47ce69SSasha Levin 	.init_vq		= init_vq,
76749bada43SJean-Philippe Brucker 	.exit_vq		= exit_vq,
76853fbb17bSJean-Philippe Brucker 	.get_vq			= get_vq,
7691c47ce69SSasha Levin 	.get_size_vq		= get_size_vq,
7700f4f51a8SAsias He 	.set_size_vq		= set_size_vq,
7710f4f51a8SAsias He 	.notify_vq		= notify_vq,
7729ed67cdcSSasha Levin 	.notify_vq_gsi		= notify_vq_gsi,
7739ed67cdcSSasha Levin 	.notify_vq_eventfd	= notify_vq_eventfd,
77495242e44SJean-Philippe Brucker 	.notify_status		= notify_status,
7751c47ce69SSasha Levin };
7761c47ce69SSasha Levin 
7779ed67cdcSSasha Levin static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev)
7789ed67cdcSSasha Levin {
7797f9733c2SAndreas Herrmann 	struct kvm_mem_bank *bank;
7809ed67cdcSSasha Levin 	struct vhost_memory *mem;
7817f9733c2SAndreas Herrmann 	int r, i;
7829ed67cdcSSasha Levin 
7839ed67cdcSSasha Levin 	ndev->vhost_fd = open("/dev/vhost-net", O_RDWR);
7849ed67cdcSSasha Levin 	if (ndev->vhost_fd < 0)
7859ed67cdcSSasha Levin 		die_perror("Failed openning vhost-net device");
7869ed67cdcSSasha Levin 
7877f9733c2SAndreas Herrmann 	mem = calloc(1, sizeof(*mem) + kvm->mem_slots * sizeof(struct vhost_memory_region));
7889ed67cdcSSasha Levin 	if (mem == NULL)
7899ed67cdcSSasha Levin 		die("Failed allocating memory for vhost memory map");
7909ed67cdcSSasha Levin 
7917f9733c2SAndreas Herrmann 	i = 0;
7927f9733c2SAndreas Herrmann 	list_for_each_entry(bank, &kvm->mem_banks, list) {
7937f9733c2SAndreas Herrmann 		mem->regions[i] = (struct vhost_memory_region) {
7947f9733c2SAndreas Herrmann 			.guest_phys_addr = bank->guest_phys_addr,
7957f9733c2SAndreas Herrmann 			.memory_size	 = bank->size,
7967f9733c2SAndreas Herrmann 			.userspace_addr	 = (unsigned long)bank->host_addr,
7979ed67cdcSSasha Levin 		};
7987f9733c2SAndreas Herrmann 		i++;
7997f9733c2SAndreas Herrmann 	}
8007f9733c2SAndreas Herrmann 	mem->nregions = i;
8019ed67cdcSSasha Levin 
8029ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER);
8039ed67cdcSSasha Levin 	if (r != 0)
8049ed67cdcSSasha Levin 		die_perror("VHOST_SET_OWNER failed");
8059ed67cdcSSasha Levin 
8069ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem);
8079ed67cdcSSasha Levin 	if (r != 0)
8089ed67cdcSSasha Levin 		die_perror("VHOST_SET_MEM_TABLE failed");
809627d6874SAsias He 
810627d6874SAsias He 	ndev->vdev.use_vhost = true;
811627d6874SAsias He 
8129ed67cdcSSasha Levin 	free(mem);
8139ed67cdcSSasha Levin }
8149ed67cdcSSasha Levin 
8155f225124SSasha Levin static inline void str_to_mac(const char *str, char *mac)
8165f225124SSasha Levin {
8175f225124SSasha Levin 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
8185f225124SSasha Levin 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
8195f225124SSasha Levin }
8204346fd8fSSasha Levin static int set_net_param(struct kvm *kvm, struct virtio_net_params *p,
8214346fd8fSSasha Levin 			const char *param, const char *val)
8225f225124SSasha Levin {
8235f225124SSasha Levin 	if (strcmp(param, "guest_mac") == 0) {
8245f225124SSasha Levin 		str_to_mac(val, p->guest_mac);
8255f225124SSasha Levin 	} else if (strcmp(param, "mode") == 0) {
8265f225124SSasha Levin 		if (!strncmp(val, "user", 4)) {
8275f225124SSasha Levin 			int i;
8285f225124SSasha Levin 
8295f225124SSasha Levin 			for (i = 0; i < kvm->cfg.num_net_devices; i++)
8305f225124SSasha Levin 				if (kvm->cfg.net_params[i].mode == NET_MODE_USER)
8315f225124SSasha Levin 					die("Only one usermode network device allowed at a time");
8325f225124SSasha Levin 			p->mode = NET_MODE_USER;
8335f225124SSasha Levin 		} else if (!strncmp(val, "tap", 3)) {
8345f225124SSasha Levin 			p->mode = NET_MODE_TAP;
8355f225124SSasha Levin 		} else if (!strncmp(val, "none", 4)) {
8365f225124SSasha Levin 			kvm->cfg.no_net = 1;
8375f225124SSasha Levin 			return -1;
8385f225124SSasha Levin 		} else
8395f225124SSasha Levin 			die("Unknown network mode %s, please use user, tap or none", kvm->cfg.network);
8405f225124SSasha Levin 	} else if (strcmp(param, "script") == 0) {
8415f225124SSasha Levin 		p->script = strdup(val);
842eef27ae3SFan Du 	} else if (strcmp(param, "downscript") == 0) {
843eef27ae3SFan Du 		p->downscript = strdup(val);
8445f225124SSasha Levin 	} else if (strcmp(param, "guest_ip") == 0) {
8455f225124SSasha Levin 		p->guest_ip = strdup(val);
8465f225124SSasha Levin 	} else if (strcmp(param, "host_ip") == 0) {
8475f225124SSasha Levin 		p->host_ip = strdup(val);
8485f225124SSasha Levin 	} else if (strcmp(param, "trans") == 0) {
8495f225124SSasha Levin 		p->trans = strdup(val);
8501dfc7c24SMarc Zyngier 	} else if (strcmp(param, "tapif") == 0) {
8511dfc7c24SMarc Zyngier 		p->tapif = strdup(val);
8525f225124SSasha Levin 	} else if (strcmp(param, "vhost") == 0) {
8535f225124SSasha Levin 		p->vhost = atoi(val);
8545f225124SSasha Levin 	} else if (strcmp(param, "fd") == 0) {
8555f225124SSasha Levin 		p->fd = atoi(val);
85679052597SSasha Levin 	} else if (strcmp(param, "mq") == 0) {
85779052597SSasha Levin 		p->mq = atoi(val);
8585f225124SSasha Levin 	} else
8595f225124SSasha Levin 		die("Unknown network parameter %s", param);
8605f225124SSasha Levin 
8615f225124SSasha Levin 	return 0;
8625f225124SSasha Levin }
8635f225124SSasha Levin 
8645f225124SSasha Levin int netdev_parser(const struct option *opt, const char *arg, int unset)
8655f225124SSasha Levin {
8665f225124SSasha Levin 	struct virtio_net_params p;
8675f225124SSasha Levin 	char *buf = NULL, *cmd = NULL, *cur = NULL;
8685f225124SSasha Levin 	bool on_cmd = true;
8695f225124SSasha Levin 	struct kvm *kvm = opt->ptr;
8705f225124SSasha Levin 
8715f225124SSasha Levin 	if (arg) {
8725f225124SSasha Levin 		buf = strdup(arg);
8735f225124SSasha Levin 		if (buf == NULL)
8745f225124SSasha Levin 			die("Failed allocating new net buffer");
8755f225124SSasha Levin 		cur = strtok(buf, ",=");
8765f225124SSasha Levin 	}
8775f225124SSasha Levin 
8785f225124SSasha Levin 	p = (struct virtio_net_params) {
8795f225124SSasha Levin 		.guest_ip	= DEFAULT_GUEST_ADDR,
8805f225124SSasha Levin 		.host_ip	= DEFAULT_HOST_ADDR,
8815f225124SSasha Levin 		.script		= DEFAULT_SCRIPT,
882eef27ae3SFan Du 		.downscript	= DEFAULT_SCRIPT,
8835f225124SSasha Levin 		.mode		= NET_MODE_TAP,
8845f225124SSasha Levin 	};
8855f225124SSasha Levin 
8865f225124SSasha Levin 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
8875f225124SSasha Levin 	p.guest_mac[5] += kvm->cfg.num_net_devices;
8885f225124SSasha Levin 
8895f225124SSasha Levin 	while (cur) {
8905f225124SSasha Levin 		if (on_cmd) {
8915f225124SSasha Levin 			cmd = cur;
8925f225124SSasha Levin 		} else {
8934346fd8fSSasha Levin 			if (set_net_param(kvm, &p, cmd, cur) < 0)
8945f225124SSasha Levin 				goto done;
8955f225124SSasha Levin 		}
8965f225124SSasha Levin 		on_cmd = !on_cmd;
8975f225124SSasha Levin 
8985f225124SSasha Levin 		cur = strtok(NULL, ",=");
8995f225124SSasha Levin 	};
9005f225124SSasha Levin 
9015f225124SSasha Levin 	kvm->cfg.num_net_devices++;
9025f225124SSasha Levin 
9035f225124SSasha Levin 	kvm->cfg.net_params = realloc(kvm->cfg.net_params, kvm->cfg.num_net_devices * sizeof(*kvm->cfg.net_params));
9045f225124SSasha Levin 	if (kvm->cfg.net_params == NULL)
9055f225124SSasha Levin 		die("Failed adding new network device");
9065f225124SSasha Levin 
9075f225124SSasha Levin 	kvm->cfg.net_params[kvm->cfg.num_net_devices - 1] = p;
9085f225124SSasha Levin 
9095f225124SSasha Levin done:
9105f225124SSasha Levin 	free(buf);
9115f225124SSasha Levin 	return 0;
9125f225124SSasha Levin }
9135f225124SSasha Levin 
9145f225124SSasha Levin static int virtio_net__init_one(struct virtio_net_params *params)
9154f56d42cSAsias He {
916db927775SAlexandru Elisei 	int i, r;
9179a6d73f1SSasha Levin 	struct net_dev *ndev;
91876a4aac6SWill Deacon 	struct virtio_ops *ops;
919dc7a55d6SSuzuki K. Poulose 	enum virtio_trans trans = VIRTIO_DEFAULT_TRANS(params->kvm);
9209a6d73f1SSasha Levin 
9219a6d73f1SSasha Levin 	ndev = calloc(1, sizeof(struct net_dev));
9229a6d73f1SSasha Levin 	if (ndev == NULL)
9235f225124SSasha Levin 		return -ENOMEM;
9249a6d73f1SSasha Levin 
9259a6d73f1SSasha Levin 	list_add_tail(&ndev->list, &ndevs);
9269a6d73f1SSasha Levin 
927db927775SAlexandru Elisei 	ops = malloc(sizeof(*ops));
928db927775SAlexandru Elisei 	if (ops == NULL)
929db927775SAlexandru Elisei 		return -ENOMEM;
930db927775SAlexandru Elisei 
9319a6d73f1SSasha Levin 	ndev->kvm = params->kvm;
9328c0ae74dSSasha Levin 	ndev->params = params;
9339a6d73f1SSasha Levin 
9349a6d73f1SSasha Levin 	mutex_init(&ndev->mutex);
93579052597SSasha Levin 	ndev->queue_pairs = max(1, min(VIRTIO_NET_NUM_QUEUES, params->mq));
93679052597SSasha Levin 
9370c54698eSAsias He 	for (i = 0 ; i < 6 ; i++) {
9389a6d73f1SSasha Levin 		ndev->config.mac[i]		= params->guest_mac[i];
9399a6d73f1SSasha Levin 		ndev->info.guest_mac.addr[i]	= params->guest_mac[i];
9409a6d73f1SSasha Levin 		ndev->info.host_mac.addr[i]	= params->host_mac[i];
9410c54698eSAsias He 	}
942f715177dSAsias He 
9439a6d73f1SSasha Levin 	ndev->mode = params->mode;
9449a6d73f1SSasha Levin 	if (ndev->mode == NET_MODE_TAP) {
9459a6d73f1SSasha Levin 		ndev->ops = &tap_ops;
94659ee54ebSWei Chen 		if (!virtio_net__tap_create(ndev))
94759ee54ebSWei Chen 			die_perror("You have requested a TAP device, but creation of one has failed because");
948b4fdde6dSAsias He 	} else {
9499a6d73f1SSasha Levin 		ndev->info.host_ip		= ntohl(inet_addr(params->host_ip));
9509a6d73f1SSasha Levin 		ndev->info.guest_ip		= ntohl(inet_addr(params->guest_ip));
9519a6d73f1SSasha Levin 		ndev->info.guest_netmask	= ntohl(inet_addr("255.255.255.0"));
9529a6d73f1SSasha Levin 		ndev->info.buf_nr		= 20,
9539a6d73f1SSasha Levin 		ndev->ops = &uip_ops;
9545f3aaf22SMarc Zyngier 		uip_static_init(&ndev->info);
955b4fdde6dSAsias He 	}
956b5ee1ea7SAsias He 
95776a4aac6SWill Deacon 	*ops = net_dev_virtio_ops;
958dc7a55d6SSuzuki K. Poulose 
959dc7a55d6SSuzuki K. Poulose 	if (params->trans) {
960dc7a55d6SSuzuki K. Poulose 		if (strcmp(params->trans, "mmio") == 0)
961dc7a55d6SSuzuki K. Poulose 			trans = VIRTIO_MMIO;
962dc7a55d6SSuzuki K. Poulose 		else if (strcmp(params->trans, "pci") == 0)
963dc7a55d6SSuzuki K. Poulose 			trans = VIRTIO_PCI;
96469205aa1SAsias He 		else
965dc7a55d6SSuzuki K. Poulose 			pr_warning("virtio-net: Unknown transport method : %s, "
966dc7a55d6SSuzuki K. Poulose 				   "falling back to %s.", params->trans,
967dc7a55d6SSuzuki K. Poulose 				   virtio_trans_name(trans));
968dc7a55d6SSuzuki K. Poulose 	}
969dc7a55d6SSuzuki K. Poulose 
970db927775SAlexandru Elisei 	r = virtio_init(params->kvm, ndev, &ndev->vdev, ops, trans,
97176a4aac6SWill Deacon 			PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET);
972db927775SAlexandru Elisei 	if (r < 0) {
973db927775SAlexandru Elisei 		free(ops);
974db927775SAlexandru Elisei 		return r;
975db927775SAlexandru Elisei 	}
97627ab67f5SSasha Levin 
9779ed67cdcSSasha Levin 	if (params->vhost)
9789ed67cdcSSasha Levin 		virtio_net__vhost_init(params->kvm, ndev);
979cb83de6fSSasha Levin 
980d278197dSAsias He 	if (compat_id == -1)
98152f34d2cSAsias He 		compat_id = virtio_compat_add_message("virtio-net", "CONFIG_VIRTIO_NET");
9825f225124SSasha Levin 
9835f225124SSasha Levin 	return 0;
9845f225124SSasha Levin }
9855f225124SSasha Levin 
9865f225124SSasha Levin int virtio_net__init(struct kvm *kvm)
9875f225124SSasha Levin {
988db927775SAlexandru Elisei 	int i, r;
9895f225124SSasha Levin 
9905f225124SSasha Levin 	for (i = 0; i < kvm->cfg.num_net_devices; i++) {
9915f225124SSasha Levin 		kvm->cfg.net_params[i].kvm = kvm;
992db927775SAlexandru Elisei 		r = virtio_net__init_one(&kvm->cfg.net_params[i]);
993db927775SAlexandru Elisei 		if (r < 0)
994db927775SAlexandru Elisei 			goto cleanup;
9955f225124SSasha Levin 	}
9965f225124SSasha Levin 
9975f225124SSasha Levin 	if (kvm->cfg.num_net_devices == 0 && kvm->cfg.no_net == 0) {
9988c0ae74dSSasha Levin 		static struct virtio_net_params net_params;
9995f225124SSasha Levin 
10005f225124SSasha Levin 		net_params = (struct virtio_net_params) {
10015f225124SSasha Levin 			.guest_ip	= kvm->cfg.guest_ip,
10025f225124SSasha Levin 			.host_ip	= kvm->cfg.host_ip,
10035f225124SSasha Levin 			.kvm		= kvm,
10045f225124SSasha Levin 			.script		= kvm->cfg.script,
10055f225124SSasha Levin 			.mode		= NET_MODE_USER,
10065f225124SSasha Levin 		};
10075f225124SSasha Levin 		str_to_mac(kvm->cfg.guest_mac, net_params.guest_mac);
10085f225124SSasha Levin 		str_to_mac(kvm->cfg.host_mac, net_params.host_mac);
10095f225124SSasha Levin 
1010db927775SAlexandru Elisei 		r = virtio_net__init_one(&net_params);
1011db927775SAlexandru Elisei 		if (r < 0)
1012db927775SAlexandru Elisei 			goto cleanup;
10135f225124SSasha Levin 	}
10145f225124SSasha Levin 
10155f225124SSasha Levin 	return 0;
1016db927775SAlexandru Elisei 
1017db927775SAlexandru Elisei cleanup:
1018db927775SAlexandru Elisei 	virtio_net__exit(kvm);
1019db927775SAlexandru Elisei 	return r;
10205f225124SSasha Levin }
102149a8afd1SSasha Levin virtio_dev_init(virtio_net__init);
10225f225124SSasha Levin 
10235f225124SSasha Levin int virtio_net__exit(struct kvm *kvm)
10245f225124SSasha Levin {
1025eef27ae3SFan Du 	struct virtio_net_params *params;
1026eef27ae3SFan Du 	struct net_dev *ndev;
1027db927775SAlexandru Elisei 	struct list_head *ptr, *n;
1028eef27ae3SFan Du 
1029db927775SAlexandru Elisei 	list_for_each_safe(ptr, n, &ndevs) {
1030eef27ae3SFan Du 		ndev = list_entry(ptr, struct net_dev, list);
1031eef27ae3SFan Du 		params = ndev->params;
1032eef27ae3SFan Du 		/* Cleanup any tap device which attached to bridge */
1033eef27ae3SFan Du 		if (ndev->mode == NET_MODE_TAP &&
1034eef27ae3SFan Du 		    strcmp(params->downscript, "none"))
1035eef27ae3SFan Du 			virtio_net_exec_script(params->downscript, ndev->tap_name);
1036db927775SAlexandru Elisei 
1037db927775SAlexandru Elisei 		list_del(&ndev->list);
1038db927775SAlexandru Elisei 		free(ndev);
1039eef27ae3SFan Du 	}
10405f225124SSasha Levin 	return 0;
10414f56d42cSAsias He }
104249a8afd1SSasha Levin virtio_dev_exit(virtio_net__exit);
1043