xref: /kvmtool/virtio/net.c (revision f19edd1e9832f14334e7deb13adccbb1928d4124)
131638bcaSCyrill Gorcunov #include "kvm/virtio-pci-dev.h"
2b5ee1ea7SAsias He #include "kvm/virtio-net.h"
34f56d42cSAsias He #include "kvm/virtio.h"
44f56d42cSAsias He #include "kvm/types.h"
54f56d42cSAsias He #include "kvm/mutex.h"
64f56d42cSAsias He #include "kvm/util.h"
74f56d42cSAsias He #include "kvm/kvm.h"
82449f6e3SSasha Levin #include "kvm/irq.h"
9b5ee1ea7SAsias He #include "kvm/uip.h"
10cb83de6fSSasha Levin #include "kvm/guest_compat.h"
111c47ce69SSasha Levin #include "kvm/virtio-trans.h"
124f56d42cSAsias He 
139ed67cdcSSasha Levin #include <linux/vhost.h>
144f56d42cSAsias He #include <linux/virtio_net.h>
154f56d42cSAsias He #include <linux/if_tun.h>
1678a2a3e2SSasha Levin #include <linux/types.h>
17c229370aSIngo Molnar 
18c229370aSIngo Molnar #include <arpa/inet.h>
194f56d42cSAsias He #include <net/if.h>
20c229370aSIngo Molnar 
21c229370aSIngo Molnar #include <unistd.h>
224f56d42cSAsias He #include <assert.h>
234f56d42cSAsias He #include <fcntl.h>
24c229370aSIngo Molnar 
25cb7202c1SSasha Levin #include <sys/socket.h>
26c229370aSIngo Molnar #include <sys/ioctl.h>
27c229370aSIngo Molnar #include <sys/types.h>
2873b7d038SAmos Kong #include <sys/wait.h>
299ed67cdcSSasha Levin #include <sys/eventfd.h>
304f56d42cSAsias He 
314f56d42cSAsias He #define VIRTIO_NET_QUEUE_SIZE		128
324f56d42cSAsias He #define VIRTIO_NET_NUM_QUEUES		2
334f56d42cSAsias He #define VIRTIO_NET_RX_QUEUE		0
344f56d42cSAsias He #define VIRTIO_NET_TX_QUEUE		1
354f56d42cSAsias He 
36b4fdde6dSAsias He struct net_dev;
37b4fdde6dSAsias He 
3878a2a3e2SSasha Levin extern struct kvm *kvm;
3978a2a3e2SSasha Levin 
40b4fdde6dSAsias He struct net_dev_operations {
41b4fdde6dSAsias He 	int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
42b4fdde6dSAsias He 	int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
43b4fdde6dSAsias He };
44b4fdde6dSAsias He 
458626798bSAsias He struct net_dev {
464f56d42cSAsias He 	pthread_mutex_t			mutex;
471c47ce69SSasha Levin 	struct virtio_trans		vtrans;
489a6d73f1SSasha Levin 	struct list_head		list;
494f56d42cSAsias He 
504f56d42cSAsias He 	struct virt_queue		vqs[VIRTIO_NET_NUM_QUEUES];
51c229370aSIngo Molnar 	struct virtio_net_config	config;
5278a2a3e2SSasha Levin 	u32				features;
534f56d42cSAsias He 
54c4aa7c02SPekka Enberg 	pthread_t			io_rx_thread;
55c229370aSIngo Molnar 	pthread_mutex_t			io_rx_lock;
56c4aa7c02SPekka Enberg 	pthread_cond_t			io_rx_cond;
57c4aa7c02SPekka Enberg 
58c4aa7c02SPekka Enberg 	pthread_t			io_tx_thread;
59c229370aSIngo Molnar 	pthread_mutex_t			io_tx_lock;
60c4aa7c02SPekka Enberg 	pthread_cond_t			io_tx_cond;
61c4aa7c02SPekka Enberg 
629ed67cdcSSasha Levin 	int				vhost_fd;
634f56d42cSAsias He 	int				tap_fd;
644f56d42cSAsias He 	char				tap_name[IFNAMSIZ];
65bb1a32f1SAsias He 
66bb1a32f1SAsias He 	int				mode;
67bb1a32f1SAsias He 
68b5ee1ea7SAsias He 	struct uip_info			info;
69b4fdde6dSAsias He 	struct net_dev_operations	*ops;
709a6d73f1SSasha Levin 	struct kvm			*kvm;
714f56d42cSAsias He };
724f56d42cSAsias He 
739a6d73f1SSasha Levin static LIST_HEAD(ndevs);
74312c62d1SSasha Levin static int compat_id = -1;
754f56d42cSAsias He 
76c4aa7c02SPekka Enberg static void *virtio_net_rx_thread(void *p)
774f56d42cSAsias He {
784f56d42cSAsias He 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
794f56d42cSAsias He 	struct virt_queue *vq;
8043835ac9SSasha Levin 	struct kvm *kvm;
819a6d73f1SSasha Levin 	struct net_dev *ndev = p;
823fdf659dSSasha Levin 	u16 out, in;
833fdf659dSSasha Levin 	u16 head;
844f56d42cSAsias He 	int len;
854f56d42cSAsias He 
869a6d73f1SSasha Levin 	kvm	= ndev->kvm;
879a6d73f1SSasha Levin 	vq	= &ndev->vqs[VIRTIO_NET_RX_QUEUE];
88c4aa7c02SPekka Enberg 
89c4aa7c02SPekka Enberg 	while (1) {
90b5ee1ea7SAsias He 
919a6d73f1SSasha Levin 		mutex_lock(&ndev->io_rx_lock);
92c4aa7c02SPekka Enberg 		if (!virt_queue__available(vq))
939a6d73f1SSasha Levin 			pthread_cond_wait(&ndev->io_rx_cond, &ndev->io_rx_lock);
949a6d73f1SSasha Levin 		mutex_unlock(&ndev->io_rx_lock);
954f56d42cSAsias He 
964f56d42cSAsias He 		while (virt_queue__available(vq)) {
97b5ee1ea7SAsias He 
9843835ac9SSasha Levin 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
99b5ee1ea7SAsias He 
1009a6d73f1SSasha Levin 			len = ndev->ops->rx(iov, in, ndev);
101b5ee1ea7SAsias He 
102246c8347SAsias He 			virt_queue__set_used_elem(vq, head, len);
1037f5ffaf5SAsias He 
104c4aa7c02SPekka Enberg 			/* We should interrupt guest right now, otherwise latency is huge. */
10592c1f37bSSasha Levin 			if (virtio_queue__should_signal(&ndev->vqs[VIRTIO_NET_RX_QUEUE]))
10692c1f37bSSasha Levin 				ndev->vtrans.trans_ops->signal_vq(kvm, &ndev->vtrans,
10792c1f37bSSasha Levin 								VIRTIO_NET_RX_QUEUE);
1084f56d42cSAsias He 		}
1094f56d42cSAsias He 
110c4aa7c02SPekka Enberg 	}
111c4aa7c02SPekka Enberg 
112c4aa7c02SPekka Enberg 	pthread_exit(NULL);
113c4aa7c02SPekka Enberg 	return NULL;
114c4aa7c02SPekka Enberg 
115c4aa7c02SPekka Enberg }
116c4aa7c02SPekka Enberg 
117c4aa7c02SPekka Enberg static void *virtio_net_tx_thread(void *p)
1184f56d42cSAsias He {
1194f56d42cSAsias He 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
1204f56d42cSAsias He 	struct virt_queue *vq;
12143835ac9SSasha Levin 	struct kvm *kvm;
1229a6d73f1SSasha Levin 	struct net_dev *ndev = p;
1233fdf659dSSasha Levin 	u16 out, in;
1243fdf659dSSasha Levin 	u16 head;
1254f56d42cSAsias He 	int len;
1264f56d42cSAsias He 
1279a6d73f1SSasha Levin 	kvm	= ndev->kvm;
1289a6d73f1SSasha Levin 	vq	= &ndev->vqs[VIRTIO_NET_TX_QUEUE];
129c4aa7c02SPekka Enberg 
130c4aa7c02SPekka Enberg 	while (1) {
1319a6d73f1SSasha Levin 		mutex_lock(&ndev->io_tx_lock);
132c4aa7c02SPekka Enberg 		if (!virt_queue__available(vq))
1339a6d73f1SSasha Levin 			pthread_cond_wait(&ndev->io_tx_cond, &ndev->io_tx_lock);
1349a6d73f1SSasha Levin 		mutex_unlock(&ndev->io_tx_lock);
1354f56d42cSAsias He 
1364f56d42cSAsias He 		while (virt_queue__available(vq)) {
137b5ee1ea7SAsias He 
13843835ac9SSasha Levin 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
139b5ee1ea7SAsias He 
1409a6d73f1SSasha Levin 			len = ndev->ops->tx(iov, out, ndev);
141b5ee1ea7SAsias He 
1424f56d42cSAsias He 			virt_queue__set_used_elem(vq, head, len);
1434f56d42cSAsias He 		}
1444f56d42cSAsias He 
14592c1f37bSSasha Levin 		if (virtio_queue__should_signal(&ndev->vqs[VIRTIO_NET_TX_QUEUE]))
1461c47ce69SSasha Levin 			ndev->vtrans.trans_ops->signal_vq(kvm, &ndev->vtrans, VIRTIO_NET_TX_QUEUE);
1474f56d42cSAsias He 	}
1484f56d42cSAsias He 
149c4aa7c02SPekka Enberg 	pthread_exit(NULL);
150407475bfSPekka Enberg 
151c4aa7c02SPekka Enberg 	return NULL;
152c4aa7c02SPekka Enberg 
153c4aa7c02SPekka Enberg }
154407475bfSPekka Enberg 
1559a6d73f1SSasha Levin static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue)
1564f56d42cSAsias He {
1579a6d73f1SSasha Levin 	switch (queue) {
158b5ee1ea7SAsias He 	case VIRTIO_NET_TX_QUEUE:
1599a6d73f1SSasha Levin 		mutex_lock(&ndev->io_tx_lock);
1609a6d73f1SSasha Levin 		pthread_cond_signal(&ndev->io_tx_cond);
1619a6d73f1SSasha Levin 		mutex_unlock(&ndev->io_tx_lock);
162407475bfSPekka Enberg 		break;
163b5ee1ea7SAsias He 	case VIRTIO_NET_RX_QUEUE:
1649a6d73f1SSasha Levin 		mutex_lock(&ndev->io_rx_lock);
1659a6d73f1SSasha Levin 		pthread_cond_signal(&ndev->io_rx_cond);
1669a6d73f1SSasha Levin 		mutex_unlock(&ndev->io_rx_lock);
167407475bfSPekka Enberg 		break;
168407475bfSPekka Enberg 	default:
1699a6d73f1SSasha Levin 		pr_warning("Unknown queue index %u", queue);
170c4aa7c02SPekka Enberg 	}
1714f56d42cSAsias He }
1724f56d42cSAsias He 
1739a6d73f1SSasha Levin static bool virtio_net__tap_init(const struct virtio_net_params *params,
1749a6d73f1SSasha Levin 					struct net_dev *ndev)
1754f56d42cSAsias He {
176cb7202c1SSasha Levin 	int sock = socket(AF_INET, SOCK_STREAM, 0);
177f715177dSAsias He 	int pid, status, offload, hdr_len;
178cb7202c1SSasha Levin 	struct sockaddr_in sin = {0};
179246c8347SAsias He 	struct ifreq ifr;
1804f56d42cSAsias He 
181*f19edd1eSSasha Levin 	/* Did the user already gave us the FD? */
182*f19edd1eSSasha Levin 	if (params->fd) {
183*f19edd1eSSasha Levin 		ndev->tap_fd = params->fd;
184*f19edd1eSSasha Levin 		return 1;
185*f19edd1eSSasha Levin 	}
186*f19edd1eSSasha Levin 
1879a6d73f1SSasha Levin 	ndev->tap_fd = open("/dev/net/tun", O_RDWR);
1889a6d73f1SSasha Levin 	if (ndev->tap_fd < 0) {
1894542f276SCyrill Gorcunov 		pr_warning("Unable to open /dev/net/tun");
1903b02f580SSasha Levin 		goto fail;
1913b02f580SSasha Levin 	}
1924f56d42cSAsias He 
1934f56d42cSAsias He 	memset(&ifr, 0, sizeof(ifr));
194246c8347SAsias He 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
1959a6d73f1SSasha Levin 	if (ioctl(ndev->tap_fd, TUNSETIFF, &ifr) < 0) {
1964542f276SCyrill Gorcunov 		pr_warning("Config tap device error. Are you root?");
1973b02f580SSasha Levin 		goto fail;
1983b02f580SSasha Levin 	}
1994f56d42cSAsias He 
2009a6d73f1SSasha Levin 	strncpy(ndev->tap_name, ifr.ifr_name, sizeof(ndev->tap_name));
2014f56d42cSAsias He 
2029a6d73f1SSasha Levin 	if (ioctl(ndev->tap_fd, TUNSETNOCSUM, 1) < 0) {
2034542f276SCyrill Gorcunov 		pr_warning("Config tap device TUNSETNOCSUM error");
204246c8347SAsias He 		goto fail;
205246c8347SAsias He 	}
206246c8347SAsias He 
207246c8347SAsias He 	hdr_len = sizeof(struct virtio_net_hdr);
2089a6d73f1SSasha Levin 	if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
2094542f276SCyrill Gorcunov 		pr_warning("Config tap device TUNSETVNETHDRSZ error");
210246c8347SAsias He 
211246c8347SAsias He 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
2129a6d73f1SSasha Levin 	if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
2134542f276SCyrill Gorcunov 		pr_warning("Config tap device TUNSETOFFLOAD error");
214246c8347SAsias He 		goto fail;
215246c8347SAsias He 	}
2164f56d42cSAsias He 
21773b7d038SAmos Kong 	if (strcmp(params->script, "none")) {
21873b7d038SAmos Kong 		pid = fork();
21973b7d038SAmos Kong 		if (pid == 0) {
2209a6d73f1SSasha Levin 			execl(params->script, params->script, ndev->tap_name, NULL);
22173b7d038SAmos Kong 			_exit(1);
22273b7d038SAmos Kong 		} else {
22373b7d038SAmos Kong 			waitpid(pid, &status, 0);
22473b7d038SAmos Kong 			if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
2254542f276SCyrill Gorcunov 				pr_warning("Fail to setup tap by %s", params->script);
22673b7d038SAmos Kong 				goto fail;
22773b7d038SAmos Kong 			}
22873b7d038SAmos Kong 		}
22973b7d038SAmos Kong 	} else {
230cb7202c1SSasha Levin 		memset(&ifr, 0, sizeof(ifr));
2319a6d73f1SSasha Levin 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
232bdfcfca6SSasha Levin 		sin.sin_addr.s_addr = inet_addr(params->host_ip);
233cb7202c1SSasha Levin 		memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
234cb7202c1SSasha Levin 		ifr.ifr_addr.sa_family = AF_INET;
2353b02f580SSasha Levin 		if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
2364542f276SCyrill Gorcunov 			pr_warning("Could not set ip address on tap device");
2373b02f580SSasha Levin 			goto fail;
2383b02f580SSasha Levin 		}
23973b7d038SAmos Kong 	}
240cb7202c1SSasha Levin 
241cb7202c1SSasha Levin 	memset(&ifr, 0, sizeof(ifr));
2429a6d73f1SSasha Levin 	strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
243cb7202c1SSasha Levin 	ioctl(sock, SIOCGIFFLAGS, &ifr);
244cb7202c1SSasha Levin 	ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
245cb7202c1SSasha Levin 	if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
2464542f276SCyrill Gorcunov 		pr_warning("Could not bring tap device up");
247cb7202c1SSasha Levin 
248cb7202c1SSasha Levin 	close(sock);
2493b02f580SSasha Levin 
2503b02f580SSasha Levin 	return 1;
2513b02f580SSasha Levin 
2523b02f580SSasha Levin fail:
2533b02f580SSasha Levin 	if (sock >= 0)
2543b02f580SSasha Levin 		close(sock);
2559a6d73f1SSasha Levin 	if (ndev->tap_fd >= 0)
2569a6d73f1SSasha Levin 		close(ndev->tap_fd);
2573b02f580SSasha Levin 
2583b02f580SSasha Levin 	return 0;
2594f56d42cSAsias He }
2604f56d42cSAsias He 
2619a6d73f1SSasha Levin static void virtio_net__io_thread_init(struct kvm *kvm, struct net_dev *ndev)
262c4aa7c02SPekka Enberg {
26306e5512fSAsias He 	pthread_mutex_init(&ndev->io_tx_lock, NULL);
2649a6d73f1SSasha Levin 	pthread_mutex_init(&ndev->io_rx_lock, NULL);
265c4aa7c02SPekka Enberg 
2669a6d73f1SSasha Levin 	pthread_cond_init(&ndev->io_tx_cond, NULL);
26706e5512fSAsias He 	pthread_cond_init(&ndev->io_rx_cond, NULL);
268c4aa7c02SPekka Enberg 
2699a6d73f1SSasha Levin 	pthread_create(&ndev->io_tx_thread, NULL, virtio_net_tx_thread, ndev);
27006e5512fSAsias He 	pthread_create(&ndev->io_rx_thread, NULL, virtio_net_rx_thread, ndev);
271c4aa7c02SPekka Enberg }
272c4aa7c02SPekka Enberg 
273b4fdde6dSAsias He static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
274b4fdde6dSAsias He {
275b4fdde6dSAsias He 	return writev(ndev->tap_fd, iov, out);
276b4fdde6dSAsias He }
277b4fdde6dSAsias He 
278b4fdde6dSAsias He static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
279b4fdde6dSAsias He {
280b4fdde6dSAsias He 	return readv(ndev->tap_fd, iov, in);
281b4fdde6dSAsias He }
282b4fdde6dSAsias He 
283b4fdde6dSAsias He static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
284b4fdde6dSAsias He {
285b4fdde6dSAsias He 	return uip_tx(iov, out, &ndev->info);
286b4fdde6dSAsias He }
287b4fdde6dSAsias He 
288b4fdde6dSAsias He static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
289b4fdde6dSAsias He {
290b4fdde6dSAsias He 	return uip_rx(iov, in, &ndev->info);
291b4fdde6dSAsias He }
292b4fdde6dSAsias He 
293b4fdde6dSAsias He static struct net_dev_operations tap_ops = {
294b4fdde6dSAsias He 	.rx	= tap_ops_rx,
295b4fdde6dSAsias He 	.tx	= tap_ops_tx,
296b4fdde6dSAsias He };
297b4fdde6dSAsias He 
298b4fdde6dSAsias He static struct net_dev_operations uip_ops = {
299b4fdde6dSAsias He 	.rx	= uip_ops_rx,
300b4fdde6dSAsias He 	.tx	= uip_ops_tx,
301b4fdde6dSAsias He };
302b4fdde6dSAsias He 
30378a2a3e2SSasha Levin static void set_config(struct kvm *kvm, void *dev, u8 data, u32 offset)
30478a2a3e2SSasha Levin {
30578a2a3e2SSasha Levin 	struct net_dev *ndev = dev;
30678a2a3e2SSasha Levin 
30778a2a3e2SSasha Levin 	((u8 *)(&ndev->config))[offset] = data;
30878a2a3e2SSasha Levin }
30978a2a3e2SSasha Levin 
31078a2a3e2SSasha Levin static u8 get_config(struct kvm *kvm, void *dev, u32 offset)
31178a2a3e2SSasha Levin {
31278a2a3e2SSasha Levin 	struct net_dev *ndev = dev;
31378a2a3e2SSasha Levin 
31478a2a3e2SSasha Levin 	return ((u8 *)(&ndev->config))[offset];
31578a2a3e2SSasha Levin }
31678a2a3e2SSasha Levin 
31778a2a3e2SSasha Levin static u32 get_host_features(struct kvm *kvm, void *dev)
31878a2a3e2SSasha Levin {
31978a2a3e2SSasha Levin 	return 1UL << VIRTIO_NET_F_MAC
32078a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_CSUM
32178a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_HOST_UFO
32278a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_HOST_TSO4
32378a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_HOST_TSO6
32478a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_GUEST_UFO
32578a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_GUEST_TSO4
32692c1f37bSSasha Levin 		| 1UL << VIRTIO_NET_F_GUEST_TSO6
327754c8ce3SSasha Levin 		| 1UL << VIRTIO_RING_F_EVENT_IDX
328754c8ce3SSasha Levin 		| 1UL << VIRTIO_RING_F_INDIRECT_DESC;
32978a2a3e2SSasha Levin }
33078a2a3e2SSasha Levin 
33178a2a3e2SSasha Levin static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
33278a2a3e2SSasha Levin {
33378a2a3e2SSasha Levin 	struct net_dev *ndev = dev;
33478a2a3e2SSasha Levin 
33578a2a3e2SSasha Levin 	ndev->features = features;
33678a2a3e2SSasha Levin }
33778a2a3e2SSasha Levin 
33878a2a3e2SSasha Levin static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 pfn)
33978a2a3e2SSasha Levin {
3409ed67cdcSSasha Levin 	struct vhost_vring_state state = { .index = vq };
3419ed67cdcSSasha Levin 	struct vhost_vring_addr addr;
34278a2a3e2SSasha Levin 	struct net_dev *ndev = dev;
34378a2a3e2SSasha Levin 	struct virt_queue *queue;
34478a2a3e2SSasha Levin 	void *p;
3459ed67cdcSSasha Levin 	int r;
34678a2a3e2SSasha Levin 
347312c62d1SSasha Levin 	compat__remove_message(compat_id);
34878a2a3e2SSasha Levin 
34978a2a3e2SSasha Levin 	queue			= &ndev->vqs[vq];
35078a2a3e2SSasha Levin 	queue->pfn		= pfn;
35178a2a3e2SSasha Levin 	p			= guest_pfn_to_host(kvm, queue->pfn);
35278a2a3e2SSasha Levin 
35378a2a3e2SSasha Levin 	vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN);
35478a2a3e2SSasha Levin 
3559ed67cdcSSasha Levin 	if (ndev->vhost_fd == 0)
35678a2a3e2SSasha Levin 		return 0;
3579ed67cdcSSasha Levin 
3589ed67cdcSSasha Levin 	state.num = queue->vring.num;
3599ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state);
3609ed67cdcSSasha Levin 	if (r < 0)
3619ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_NUM failed");
3629ed67cdcSSasha Levin 	state.num = 0;
3639ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state);
3649ed67cdcSSasha Levin 	if (r < 0)
3659ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_BASE failed");
3669ed67cdcSSasha Levin 
3679ed67cdcSSasha Levin 	addr = (struct vhost_vring_addr) {
3689ed67cdcSSasha Levin 		.index = vq,
3699ed67cdcSSasha Levin 		.desc_user_addr = (u64)(unsigned long)queue->vring.desc,
3709ed67cdcSSasha Levin 		.avail_user_addr = (u64)(unsigned long)queue->vring.avail,
3719ed67cdcSSasha Levin 		.used_user_addr = (u64)(unsigned long)queue->vring.used,
3729ed67cdcSSasha Levin 	};
3739ed67cdcSSasha Levin 
3749ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr);
3759ed67cdcSSasha Levin 	if (r < 0)
3769ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_ADDR failed");
3779ed67cdcSSasha Levin 
3789ed67cdcSSasha Levin 	return 0;
3799ed67cdcSSasha Levin }
3809ed67cdcSSasha Levin 
3819ed67cdcSSasha Levin static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
3829ed67cdcSSasha Levin {
3839ed67cdcSSasha Levin 	struct net_dev *ndev = dev;
3849ed67cdcSSasha Levin 	struct kvm_irqfd irq;
3859ed67cdcSSasha Levin 	struct vhost_vring_file file;
3869ed67cdcSSasha Levin 	int r;
3879ed67cdcSSasha Levin 
3889ed67cdcSSasha Levin 	if (ndev->vhost_fd == 0)
3899ed67cdcSSasha Levin 		return;
3909ed67cdcSSasha Levin 
3919ed67cdcSSasha Levin 	irq = (struct kvm_irqfd) {
3929ed67cdcSSasha Levin 		.gsi	= gsi,
3939ed67cdcSSasha Levin 		.fd	= eventfd(0, 0),
3949ed67cdcSSasha Levin 	};
3959ed67cdcSSasha Levin 	file = (struct vhost_vring_file) {
3969ed67cdcSSasha Levin 		.index	= vq,
3979ed67cdcSSasha Levin 		.fd	= irq.fd,
3989ed67cdcSSasha Levin 	};
3999ed67cdcSSasha Levin 
4009ed67cdcSSasha Levin 	r = ioctl(kvm->vm_fd, KVM_IRQFD, &irq);
4019ed67cdcSSasha Levin 	if (r < 0)
4029ed67cdcSSasha Levin 		die_perror("KVM_IRQFD failed");
4039ed67cdcSSasha Levin 
4049ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file);
4059ed67cdcSSasha Levin 	if (r < 0)
4069ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_CALL failed");
4079ed67cdcSSasha Levin 	file.fd = ndev->tap_fd;
4089ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file);
4099ed67cdcSSasha Levin 	if (r != 0)
4109ed67cdcSSasha Levin 		die("VHOST_NET_SET_BACKEND failed %d", errno);
4119ed67cdcSSasha Levin 
4129ed67cdcSSasha Levin }
4139ed67cdcSSasha Levin 
4149ed67cdcSSasha Levin static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
4159ed67cdcSSasha Levin {
4169ed67cdcSSasha Levin 	struct net_dev *ndev = dev;
4179ed67cdcSSasha Levin 	struct vhost_vring_file file = {
4189ed67cdcSSasha Levin 		.index	= vq,
4199ed67cdcSSasha Levin 		.fd	= efd,
4209ed67cdcSSasha Levin 	};
4219ed67cdcSSasha Levin 	int r;
4229ed67cdcSSasha Levin 
4239ed67cdcSSasha Levin 	if (ndev->vhost_fd == 0)
4249ed67cdcSSasha Levin 		return;
4259ed67cdcSSasha Levin 
4269ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file);
4279ed67cdcSSasha Levin 	if (r < 0)
4289ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_KICK failed");
42978a2a3e2SSasha Levin }
43078a2a3e2SSasha Levin 
43178a2a3e2SSasha Levin static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
43278a2a3e2SSasha Levin {
4339a6d73f1SSasha Levin 	struct net_dev *ndev = dev;
4349a6d73f1SSasha Levin 
4359a6d73f1SSasha Levin 	virtio_net_handle_callback(kvm, ndev, vq);
43678a2a3e2SSasha Levin 
43778a2a3e2SSasha Levin 	return 0;
43878a2a3e2SSasha Levin }
43978a2a3e2SSasha Levin 
44078a2a3e2SSasha Levin static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq)
44178a2a3e2SSasha Levin {
44278a2a3e2SSasha Levin 	struct net_dev *ndev = dev;
44378a2a3e2SSasha Levin 
44478a2a3e2SSasha Levin 	return ndev->vqs[vq].pfn;
44578a2a3e2SSasha Levin }
44678a2a3e2SSasha Levin 
44778a2a3e2SSasha Levin static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
44878a2a3e2SSasha Levin {
44978a2a3e2SSasha Levin 	return VIRTIO_NET_QUEUE_SIZE;
45078a2a3e2SSasha Levin }
45178a2a3e2SSasha Levin 
4521c47ce69SSasha Levin static struct virtio_ops net_dev_virtio_ops = (struct virtio_ops) {
4531c47ce69SSasha Levin 	.set_config		= set_config,
4541c47ce69SSasha Levin 	.get_config		= get_config,
4551c47ce69SSasha Levin 	.get_host_features	= get_host_features,
4561c47ce69SSasha Levin 	.set_guest_features	= set_guest_features,
4571c47ce69SSasha Levin 	.init_vq		= init_vq,
4581c47ce69SSasha Levin 	.notify_vq		= notify_vq,
4591c47ce69SSasha Levin 	.get_pfn_vq		= get_pfn_vq,
4601c47ce69SSasha Levin 	.get_size_vq		= get_size_vq,
4619ed67cdcSSasha Levin 	.notify_vq_gsi		= notify_vq_gsi,
4629ed67cdcSSasha Levin 	.notify_vq_eventfd	= notify_vq_eventfd,
4631c47ce69SSasha Levin };
4641c47ce69SSasha Levin 
4659ed67cdcSSasha Levin static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev)
4669ed67cdcSSasha Levin {
46792c1f37bSSasha Levin 	u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX;
4689ed67cdcSSasha Levin 	struct vhost_memory *mem;
4699ed67cdcSSasha Levin 	int r;
4709ed67cdcSSasha Levin 
4719ed67cdcSSasha Levin 	ndev->vhost_fd = open("/dev/vhost-net", O_RDWR);
4729ed67cdcSSasha Levin 	if (ndev->vhost_fd < 0)
4739ed67cdcSSasha Levin 		die_perror("Failed openning vhost-net device");
4749ed67cdcSSasha Levin 
4759ed67cdcSSasha Levin 	mem = malloc(sizeof(*mem) + sizeof(struct vhost_memory_region));
4769ed67cdcSSasha Levin 	if (mem == NULL)
4779ed67cdcSSasha Levin 		die("Failed allocating memory for vhost memory map");
4789ed67cdcSSasha Levin 
4799ed67cdcSSasha Levin 	mem->nregions = 1;
4809ed67cdcSSasha Levin 	mem->regions[0] = (struct vhost_memory_region) {
4819ed67cdcSSasha Levin 		.guest_phys_addr	= 0,
4829ed67cdcSSasha Levin 		.memory_size		= kvm->ram_size,
483b4108023SAsias He 		.userspace_addr		= (unsigned long)kvm->ram_start,
4849ed67cdcSSasha Levin 	};
4859ed67cdcSSasha Levin 
4869ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER);
4879ed67cdcSSasha Levin 	if (r != 0)
4889ed67cdcSSasha Levin 		die_perror("VHOST_SET_OWNER failed");
4899ed67cdcSSasha Levin 
4909ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features);
4919ed67cdcSSasha Levin 	if (r != 0)
4929ed67cdcSSasha Levin 		die_perror("VHOST_SET_FEATURES failed");
4939ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem);
4949ed67cdcSSasha Levin 	if (r != 0)
4959ed67cdcSSasha Levin 		die_perror("VHOST_SET_MEM_TABLE failed");
4969ed67cdcSSasha Levin 	free(mem);
4979ed67cdcSSasha Levin }
4989ed67cdcSSasha Levin 
4999a6d73f1SSasha Levin void virtio_net__init(const struct virtio_net_params *params)
5004f56d42cSAsias He {
501b5ee1ea7SAsias He 	int i;
5029a6d73f1SSasha Levin 	struct net_dev *ndev;
5039a6d73f1SSasha Levin 
5049a6d73f1SSasha Levin 	if (!params)
5059a6d73f1SSasha Levin 		return;
5069a6d73f1SSasha Levin 
5079a6d73f1SSasha Levin 	ndev = calloc(1, sizeof(struct net_dev));
5089a6d73f1SSasha Levin 	if (ndev == NULL)
5099a6d73f1SSasha Levin 		die("Failed allocating ndev");
5109a6d73f1SSasha Levin 
5119a6d73f1SSasha Levin 	list_add_tail(&ndev->list, &ndevs);
5129a6d73f1SSasha Levin 
5139a6d73f1SSasha Levin 	ndev->kvm = params->kvm;
5149a6d73f1SSasha Levin 
5159a6d73f1SSasha Levin 	mutex_init(&ndev->mutex);
5169a6d73f1SSasha Levin 	ndev->config.status = VIRTIO_NET_S_LINK_UP;
5172449f6e3SSasha Levin 
5180c54698eSAsias He 	for (i = 0 ; i < 6 ; i++) {
5199a6d73f1SSasha Levin 		ndev->config.mac[i]		= params->guest_mac[i];
5209a6d73f1SSasha Levin 		ndev->info.guest_mac.addr[i]	= params->guest_mac[i];
5219a6d73f1SSasha Levin 		ndev->info.host_mac.addr[i]	= params->host_mac[i];
5220c54698eSAsias He 	}
523f715177dSAsias He 
5249a6d73f1SSasha Levin 	ndev->mode = params->mode;
5259a6d73f1SSasha Levin 	if (ndev->mode == NET_MODE_TAP) {
5269a6d73f1SSasha Levin 		if (!virtio_net__tap_init(params, ndev))
5279a6d73f1SSasha Levin 			die_perror("You have requested a TAP device, but creation of one has"
5289a6d73f1SSasha Levin 					"failed because:");
5299a6d73f1SSasha Levin 		ndev->ops = &tap_ops;
530b4fdde6dSAsias He 	} else {
5319a6d73f1SSasha Levin 		ndev->info.host_ip		= ntohl(inet_addr(params->host_ip));
5329a6d73f1SSasha Levin 		ndev->info.guest_ip		= ntohl(inet_addr(params->guest_ip));
5339a6d73f1SSasha Levin 		ndev->info.guest_netmask	= ntohl(inet_addr("255.255.255.0"));
5349a6d73f1SSasha Levin 		ndev->info.buf_nr		= 20,
5359a6d73f1SSasha Levin 		uip_init(&ndev->info);
5369a6d73f1SSasha Levin 		ndev->ops = &uip_ops;
537b4fdde6dSAsias He 	}
538b5ee1ea7SAsias He 
5391c47ce69SSasha Levin 	virtio_trans_init(&ndev->vtrans, VIRTIO_PCI);
5401c47ce69SSasha Levin 	ndev->vtrans.trans_ops->init(kvm, &ndev->vtrans, ndev, PCI_DEVICE_ID_VIRTIO_NET,
5411c47ce69SSasha Levin 					VIRTIO_ID_NET, PCI_CLASS_NET);
5421c47ce69SSasha Levin 	ndev->vtrans.virtio_ops = &net_dev_virtio_ops;
54327ab67f5SSasha Levin 
5449ed67cdcSSasha Levin 	if (params->vhost)
5459ed67cdcSSasha Levin 		virtio_net__vhost_init(params->kvm, ndev);
5469ed67cdcSSasha Levin 	else
5479a6d73f1SSasha Levin 		virtio_net__io_thread_init(params->kvm, ndev);
548cb83de6fSSasha Levin 
549312c62d1SSasha Levin 	if (compat_id != -1)
550312c62d1SSasha Levin 		compat_id = compat__add_message("virtio-net device was not detected",
551cb83de6fSSasha Levin 						"While you have requested a virtio-net device, "
552fc835ab3SSasha Levin 						"the guest kernel did not initialize it.\n"
553fc835ab3SSasha Levin 						"Please make sure that the guest kernel was "
554fc835ab3SSasha Levin 						"compiled with CONFIG_VIRTIO_NET=y enabled "
555fc835ab3SSasha Levin 						"in its .config");
5564f56d42cSAsias He }
557