xref: /kvmtool/virtio/net.c (revision d3476f7d3bb7cee0b620cf207c168cb4f5b5c41e)
131638bcaSCyrill Gorcunov #include "kvm/virtio-pci-dev.h"
2b5ee1ea7SAsias He #include "kvm/virtio-net.h"
34f56d42cSAsias He #include "kvm/virtio.h"
44f56d42cSAsias He #include "kvm/types.h"
54f56d42cSAsias He #include "kvm/mutex.h"
64f56d42cSAsias He #include "kvm/util.h"
74f56d42cSAsias He #include "kvm/kvm.h"
82449f6e3SSasha Levin #include "kvm/irq.h"
9b5ee1ea7SAsias He #include "kvm/uip.h"
10cb83de6fSSasha Levin #include "kvm/guest_compat.h"
114f56d42cSAsias He 
129ed67cdcSSasha Levin #include <linux/vhost.h>
134f56d42cSAsias He #include <linux/virtio_net.h>
144f56d42cSAsias He #include <linux/if_tun.h>
1578a2a3e2SSasha Levin #include <linux/types.h>
16c229370aSIngo Molnar 
17c229370aSIngo Molnar #include <arpa/inet.h>
184f56d42cSAsias He #include <net/if.h>
19c229370aSIngo Molnar 
20c229370aSIngo Molnar #include <unistd.h>
214f56d42cSAsias He #include <fcntl.h>
22c229370aSIngo Molnar 
23cb7202c1SSasha Levin #include <sys/socket.h>
24c229370aSIngo Molnar #include <sys/ioctl.h>
25c229370aSIngo Molnar #include <sys/types.h>
2673b7d038SAmos Kong #include <sys/wait.h>
279ed67cdcSSasha Levin #include <sys/eventfd.h>
284f56d42cSAsias He 
294059ad8bSAsias He #define VIRTIO_NET_QUEUE_SIZE		256
304f56d42cSAsias He #define VIRTIO_NET_NUM_QUEUES		2
314f56d42cSAsias He #define VIRTIO_NET_RX_QUEUE		0
324f56d42cSAsias He #define VIRTIO_NET_TX_QUEUE		1
334f56d42cSAsias He 
34b4fdde6dSAsias He struct net_dev;
35b4fdde6dSAsias He 
36b4fdde6dSAsias He struct net_dev_operations {
37b4fdde6dSAsias He 	int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
38b4fdde6dSAsias He 	int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
39b4fdde6dSAsias He };
40b4fdde6dSAsias He 
418626798bSAsias He struct net_dev {
42*d3476f7dSSasha Levin 	struct mutex			mutex;
4302eca50cSAsias He 	struct virtio_device		vdev;
449a6d73f1SSasha Levin 	struct list_head		list;
454f56d42cSAsias He 
464f56d42cSAsias He 	struct virt_queue		vqs[VIRTIO_NET_NUM_QUEUES];
47c229370aSIngo Molnar 	struct virtio_net_config	config;
4878a2a3e2SSasha Levin 	u32				features;
494f56d42cSAsias He 
50c4aa7c02SPekka Enberg 	pthread_t			io_rx_thread;
51*d3476f7dSSasha Levin 	struct mutex			io_rx_lock;
52c4aa7c02SPekka Enberg 	pthread_cond_t			io_rx_cond;
53c4aa7c02SPekka Enberg 
54c4aa7c02SPekka Enberg 	pthread_t			io_tx_thread;
55*d3476f7dSSasha Levin 	struct mutex			io_tx_lock;
56c4aa7c02SPekka Enberg 	pthread_cond_t			io_tx_cond;
57c4aa7c02SPekka Enberg 
589ed67cdcSSasha Levin 	int				vhost_fd;
594f56d42cSAsias He 	int				tap_fd;
604f56d42cSAsias He 	char				tap_name[IFNAMSIZ];
61bb1a32f1SAsias He 
62bb1a32f1SAsias He 	int				mode;
63bb1a32f1SAsias He 
64b5ee1ea7SAsias He 	struct uip_info			info;
65b4fdde6dSAsias He 	struct net_dev_operations	*ops;
669a6d73f1SSasha Levin 	struct kvm			*kvm;
674f56d42cSAsias He };
684f56d42cSAsias He 
699a6d73f1SSasha Levin static LIST_HEAD(ndevs);
70312c62d1SSasha Levin static int compat_id = -1;
714f56d42cSAsias He 
72c4aa7c02SPekka Enberg static void *virtio_net_rx_thread(void *p)
734f56d42cSAsias He {
744f56d42cSAsias He 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
754f56d42cSAsias He 	struct virt_queue *vq;
7643835ac9SSasha Levin 	struct kvm *kvm;
779a6d73f1SSasha Levin 	struct net_dev *ndev = p;
783fdf659dSSasha Levin 	u16 out, in;
793fdf659dSSasha Levin 	u16 head;
804f56d42cSAsias He 	int len;
814f56d42cSAsias He 
82a4d8c55eSSasha Levin 	kvm__set_thread_name("virtio-net-rx");
83a4d8c55eSSasha Levin 
849a6d73f1SSasha Levin 	kvm = ndev->kvm;
859a6d73f1SSasha Levin 	vq = &ndev->vqs[VIRTIO_NET_RX_QUEUE];
86c4aa7c02SPekka Enberg 
87c4aa7c02SPekka Enberg 	while (1) {
889a6d73f1SSasha Levin 		mutex_lock(&ndev->io_rx_lock);
89c4aa7c02SPekka Enberg 		if (!virt_queue__available(vq))
90*d3476f7dSSasha Levin 			pthread_cond_wait(&ndev->io_rx_cond, &ndev->io_rx_lock.mutex);
919a6d73f1SSasha Levin 		mutex_unlock(&ndev->io_rx_lock);
924f56d42cSAsias He 
934f56d42cSAsias He 		while (virt_queue__available(vq)) {
9443835ac9SSasha Levin 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
959a6d73f1SSasha Levin 			len = ndev->ops->rx(iov, in, ndev);
96246c8347SAsias He 			virt_queue__set_used_elem(vq, head, len);
977f5ffaf5SAsias He 
98c4aa7c02SPekka Enberg 			/* We should interrupt guest right now, otherwise latency is huge. */
9992c1f37bSSasha Levin 			if (virtio_queue__should_signal(&ndev->vqs[VIRTIO_NET_RX_QUEUE]))
10002eca50cSAsias He 				ndev->vdev.ops->signal_vq(kvm, &ndev->vdev,
10192c1f37bSSasha Levin 							   VIRTIO_NET_RX_QUEUE);
1024f56d42cSAsias He 		}
103c4aa7c02SPekka Enberg 	}
104c4aa7c02SPekka Enberg 
105c4aa7c02SPekka Enberg 	pthread_exit(NULL);
106c4aa7c02SPekka Enberg 	return NULL;
107c4aa7c02SPekka Enberg 
108c4aa7c02SPekka Enberg }
109c4aa7c02SPekka Enberg 
110c4aa7c02SPekka Enberg static void *virtio_net_tx_thread(void *p)
1114f56d42cSAsias He {
1124f56d42cSAsias He 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
1134f56d42cSAsias He 	struct virt_queue *vq;
11443835ac9SSasha Levin 	struct kvm *kvm;
1159a6d73f1SSasha Levin 	struct net_dev *ndev = p;
1163fdf659dSSasha Levin 	u16 out, in;
1173fdf659dSSasha Levin 	u16 head;
1184f56d42cSAsias He 	int len;
1194f56d42cSAsias He 
120a4d8c55eSSasha Levin 	kvm__set_thread_name("virtio-net-tx");
121a4d8c55eSSasha Levin 
1229a6d73f1SSasha Levin 	kvm = ndev->kvm;
1239a6d73f1SSasha Levin 	vq = &ndev->vqs[VIRTIO_NET_TX_QUEUE];
124c4aa7c02SPekka Enberg 
125c4aa7c02SPekka Enberg 	while (1) {
1269a6d73f1SSasha Levin 		mutex_lock(&ndev->io_tx_lock);
127c4aa7c02SPekka Enberg 		if (!virt_queue__available(vq))
128*d3476f7dSSasha Levin 			pthread_cond_wait(&ndev->io_tx_cond, &ndev->io_tx_lock.mutex);
1299a6d73f1SSasha Levin 		mutex_unlock(&ndev->io_tx_lock);
1304f56d42cSAsias He 
1314f56d42cSAsias He 		while (virt_queue__available(vq)) {
13243835ac9SSasha Levin 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
1339a6d73f1SSasha Levin 			len = ndev->ops->tx(iov, out, ndev);
1344f56d42cSAsias He 			virt_queue__set_used_elem(vq, head, len);
1354f56d42cSAsias He 		}
1364f56d42cSAsias He 
13792c1f37bSSasha Levin 		if (virtio_queue__should_signal(&ndev->vqs[VIRTIO_NET_TX_QUEUE]))
13802eca50cSAsias He 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, VIRTIO_NET_TX_QUEUE);
1394f56d42cSAsias He 	}
1404f56d42cSAsias He 
141c4aa7c02SPekka Enberg 	pthread_exit(NULL);
142407475bfSPekka Enberg 
143c4aa7c02SPekka Enberg 	return NULL;
144c4aa7c02SPekka Enberg 
145c4aa7c02SPekka Enberg }
146407475bfSPekka Enberg 
1479a6d73f1SSasha Levin static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue)
1484f56d42cSAsias He {
1499a6d73f1SSasha Levin 	switch (queue) {
150b5ee1ea7SAsias He 	case VIRTIO_NET_TX_QUEUE:
1519a6d73f1SSasha Levin 		mutex_lock(&ndev->io_tx_lock);
1529a6d73f1SSasha Levin 		pthread_cond_signal(&ndev->io_tx_cond);
1539a6d73f1SSasha Levin 		mutex_unlock(&ndev->io_tx_lock);
154407475bfSPekka Enberg 		break;
155b5ee1ea7SAsias He 	case VIRTIO_NET_RX_QUEUE:
1569a6d73f1SSasha Levin 		mutex_lock(&ndev->io_rx_lock);
1579a6d73f1SSasha Levin 		pthread_cond_signal(&ndev->io_rx_cond);
1589a6d73f1SSasha Levin 		mutex_unlock(&ndev->io_rx_lock);
159407475bfSPekka Enberg 		break;
160407475bfSPekka Enberg 	default:
1619a6d73f1SSasha Levin 		pr_warning("Unknown queue index %u", queue);
162c4aa7c02SPekka Enberg 	}
1634f56d42cSAsias He }
1644f56d42cSAsias He 
1659a6d73f1SSasha Levin static bool virtio_net__tap_init(const struct virtio_net_params *params,
1669a6d73f1SSasha Levin 					struct net_dev *ndev)
1674f56d42cSAsias He {
168cb7202c1SSasha Levin 	int sock = socket(AF_INET, SOCK_STREAM, 0);
169f715177dSAsias He 	int pid, status, offload, hdr_len;
170cb7202c1SSasha Levin 	struct sockaddr_in sin = {0};
171246c8347SAsias He 	struct ifreq ifr;
1724f56d42cSAsias He 
173f19edd1eSSasha Levin 	/* Did the user already gave us the FD? */
174f19edd1eSSasha Levin 	if (params->fd) {
175f19edd1eSSasha Levin 		ndev->tap_fd = params->fd;
176f19edd1eSSasha Levin 		return 1;
177f19edd1eSSasha Levin 	}
178f19edd1eSSasha Levin 
1799a6d73f1SSasha Levin 	ndev->tap_fd = open("/dev/net/tun", O_RDWR);
1809a6d73f1SSasha Levin 	if (ndev->tap_fd < 0) {
1814542f276SCyrill Gorcunov 		pr_warning("Unable to open /dev/net/tun");
1823b02f580SSasha Levin 		goto fail;
1833b02f580SSasha Levin 	}
1844f56d42cSAsias He 
1854f56d42cSAsias He 	memset(&ifr, 0, sizeof(ifr));
186246c8347SAsias He 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
1879a6d73f1SSasha Levin 	if (ioctl(ndev->tap_fd, TUNSETIFF, &ifr) < 0) {
1884542f276SCyrill Gorcunov 		pr_warning("Config tap device error. Are you root?");
1893b02f580SSasha Levin 		goto fail;
1903b02f580SSasha Levin 	}
1914f56d42cSAsias He 
1929a6d73f1SSasha Levin 	strncpy(ndev->tap_name, ifr.ifr_name, sizeof(ndev->tap_name));
1934f56d42cSAsias He 
1949a6d73f1SSasha Levin 	if (ioctl(ndev->tap_fd, TUNSETNOCSUM, 1) < 0) {
1954542f276SCyrill Gorcunov 		pr_warning("Config tap device TUNSETNOCSUM error");
196246c8347SAsias He 		goto fail;
197246c8347SAsias He 	}
198246c8347SAsias He 
199246c8347SAsias He 	hdr_len = sizeof(struct virtio_net_hdr);
2009a6d73f1SSasha Levin 	if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
2014542f276SCyrill Gorcunov 		pr_warning("Config tap device TUNSETVNETHDRSZ error");
202246c8347SAsias He 
203246c8347SAsias He 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
2049a6d73f1SSasha Levin 	if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
2054542f276SCyrill Gorcunov 		pr_warning("Config tap device TUNSETOFFLOAD error");
206246c8347SAsias He 		goto fail;
207246c8347SAsias He 	}
2084f56d42cSAsias He 
20973b7d038SAmos Kong 	if (strcmp(params->script, "none")) {
21073b7d038SAmos Kong 		pid = fork();
21173b7d038SAmos Kong 		if (pid == 0) {
2129a6d73f1SSasha Levin 			execl(params->script, params->script, ndev->tap_name, NULL);
21373b7d038SAmos Kong 			_exit(1);
21473b7d038SAmos Kong 		} else {
21573b7d038SAmos Kong 			waitpid(pid, &status, 0);
21673b7d038SAmos Kong 			if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
2174542f276SCyrill Gorcunov 				pr_warning("Fail to setup tap by %s", params->script);
21873b7d038SAmos Kong 				goto fail;
21973b7d038SAmos Kong 			}
22073b7d038SAmos Kong 		}
22173b7d038SAmos Kong 	} else {
222cb7202c1SSasha Levin 		memset(&ifr, 0, sizeof(ifr));
2239a6d73f1SSasha Levin 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
224bdfcfca6SSasha Levin 		sin.sin_addr.s_addr = inet_addr(params->host_ip);
225cb7202c1SSasha Levin 		memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
226cb7202c1SSasha Levin 		ifr.ifr_addr.sa_family = AF_INET;
2273b02f580SSasha Levin 		if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
2284542f276SCyrill Gorcunov 			pr_warning("Could not set ip address on tap device");
2293b02f580SSasha Levin 			goto fail;
2303b02f580SSasha Levin 		}
23173b7d038SAmos Kong 	}
232cb7202c1SSasha Levin 
233cb7202c1SSasha Levin 	memset(&ifr, 0, sizeof(ifr));
2349a6d73f1SSasha Levin 	strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
235cb7202c1SSasha Levin 	ioctl(sock, SIOCGIFFLAGS, &ifr);
236cb7202c1SSasha Levin 	ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
237cb7202c1SSasha Levin 	if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
2384542f276SCyrill Gorcunov 		pr_warning("Could not bring tap device up");
239cb7202c1SSasha Levin 
240cb7202c1SSasha Levin 	close(sock);
2413b02f580SSasha Levin 
2423b02f580SSasha Levin 	return 1;
2433b02f580SSasha Levin 
2443b02f580SSasha Levin fail:
2453b02f580SSasha Levin 	if (sock >= 0)
2463b02f580SSasha Levin 		close(sock);
2479a6d73f1SSasha Levin 	if (ndev->tap_fd >= 0)
2489a6d73f1SSasha Levin 		close(ndev->tap_fd);
2493b02f580SSasha Levin 
2503b02f580SSasha Levin 	return 0;
2514f56d42cSAsias He }
2524f56d42cSAsias He 
2539a6d73f1SSasha Levin static void virtio_net__io_thread_init(struct kvm *kvm, struct net_dev *ndev)
254c4aa7c02SPekka Enberg {
255*d3476f7dSSasha Levin 	mutex_init(&ndev->io_tx_lock);
256*d3476f7dSSasha Levin 	mutex_init(&ndev->io_rx_lock);
257c4aa7c02SPekka Enberg 
2589a6d73f1SSasha Levin 	pthread_cond_init(&ndev->io_tx_cond, NULL);
25906e5512fSAsias He 	pthread_cond_init(&ndev->io_rx_cond, NULL);
260c4aa7c02SPekka Enberg 
2619a6d73f1SSasha Levin 	pthread_create(&ndev->io_tx_thread, NULL, virtio_net_tx_thread, ndev);
26206e5512fSAsias He 	pthread_create(&ndev->io_rx_thread, NULL, virtio_net_rx_thread, ndev);
263c4aa7c02SPekka Enberg }
264c4aa7c02SPekka Enberg 
265b4fdde6dSAsias He static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
266b4fdde6dSAsias He {
267b4fdde6dSAsias He 	return writev(ndev->tap_fd, iov, out);
268b4fdde6dSAsias He }
269b4fdde6dSAsias He 
270b4fdde6dSAsias He static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
271b4fdde6dSAsias He {
272b4fdde6dSAsias He 	return readv(ndev->tap_fd, iov, in);
273b4fdde6dSAsias He }
274b4fdde6dSAsias He 
275b4fdde6dSAsias He static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
276b4fdde6dSAsias He {
277b4fdde6dSAsias He 	return uip_tx(iov, out, &ndev->info);
278b4fdde6dSAsias He }
279b4fdde6dSAsias He 
280b4fdde6dSAsias He static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
281b4fdde6dSAsias He {
282b4fdde6dSAsias He 	return uip_rx(iov, in, &ndev->info);
283b4fdde6dSAsias He }
284b4fdde6dSAsias He 
285b4fdde6dSAsias He static struct net_dev_operations tap_ops = {
286b4fdde6dSAsias He 	.rx	= tap_ops_rx,
287b4fdde6dSAsias He 	.tx	= tap_ops_tx,
288b4fdde6dSAsias He };
289b4fdde6dSAsias He 
290b4fdde6dSAsias He static struct net_dev_operations uip_ops = {
291b4fdde6dSAsias He 	.rx	= uip_ops_rx,
292b4fdde6dSAsias He 	.tx	= uip_ops_tx,
293b4fdde6dSAsias He };
294b4fdde6dSAsias He 
295c5ae742bSSasha Levin static u8 *get_config(struct kvm *kvm, void *dev)
29678a2a3e2SSasha Levin {
29778a2a3e2SSasha Levin 	struct net_dev *ndev = dev;
29878a2a3e2SSasha Levin 
299c5ae742bSSasha Levin 	return ((u8 *)(&ndev->config));
30078a2a3e2SSasha Levin }
30178a2a3e2SSasha Levin 
30278a2a3e2SSasha Levin static u32 get_host_features(struct kvm *kvm, void *dev)
30378a2a3e2SSasha Levin {
30478a2a3e2SSasha Levin 	return 1UL << VIRTIO_NET_F_MAC
30578a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_CSUM
30678a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_HOST_UFO
30778a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_HOST_TSO4
30878a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_HOST_TSO6
30978a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_GUEST_UFO
31078a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_GUEST_TSO4
31192c1f37bSSasha Levin 		| 1UL << VIRTIO_NET_F_GUEST_TSO6
312754c8ce3SSasha Levin 		| 1UL << VIRTIO_RING_F_EVENT_IDX
313754c8ce3SSasha Levin 		| 1UL << VIRTIO_RING_F_INDIRECT_DESC;
31478a2a3e2SSasha Levin }
31578a2a3e2SSasha Levin 
31678a2a3e2SSasha Levin static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
31778a2a3e2SSasha Levin {
31878a2a3e2SSasha Levin 	struct net_dev *ndev = dev;
31978a2a3e2SSasha Levin 
32078a2a3e2SSasha Levin 	ndev->features = features;
32178a2a3e2SSasha Levin }
32278a2a3e2SSasha Levin 
32378a2a3e2SSasha Levin static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 pfn)
32478a2a3e2SSasha Levin {
3259ed67cdcSSasha Levin 	struct vhost_vring_state state = { .index = vq };
3269ed67cdcSSasha Levin 	struct vhost_vring_addr addr;
32778a2a3e2SSasha Levin 	struct net_dev *ndev = dev;
32878a2a3e2SSasha Levin 	struct virt_queue *queue;
32978a2a3e2SSasha Levin 	void *p;
3309ed67cdcSSasha Levin 	int r;
33178a2a3e2SSasha Levin 
332312c62d1SSasha Levin 	compat__remove_message(compat_id);
33378a2a3e2SSasha Levin 
33478a2a3e2SSasha Levin 	queue		= &ndev->vqs[vq];
33578a2a3e2SSasha Levin 	queue->pfn	= pfn;
33678a2a3e2SSasha Levin 	p		= guest_pfn_to_host(kvm, queue->pfn);
33778a2a3e2SSasha Levin 
3380f4f51a8SAsias He 	/* FIXME: respect pci and mmio vring alignment */
33978a2a3e2SSasha Levin 	vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN);
34078a2a3e2SSasha Levin 
3419ed67cdcSSasha Levin 	if (ndev->vhost_fd == 0)
34278a2a3e2SSasha Levin 		return 0;
3439ed67cdcSSasha Levin 
3449ed67cdcSSasha Levin 	state.num = queue->vring.num;
3459ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state);
3469ed67cdcSSasha Levin 	if (r < 0)
3479ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_NUM failed");
3489ed67cdcSSasha Levin 	state.num = 0;
3499ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state);
3509ed67cdcSSasha Levin 	if (r < 0)
3519ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_BASE failed");
3529ed67cdcSSasha Levin 
3539ed67cdcSSasha Levin 	addr = (struct vhost_vring_addr) {
3549ed67cdcSSasha Levin 		.index = vq,
3559ed67cdcSSasha Levin 		.desc_user_addr = (u64)(unsigned long)queue->vring.desc,
3569ed67cdcSSasha Levin 		.avail_user_addr = (u64)(unsigned long)queue->vring.avail,
3579ed67cdcSSasha Levin 		.used_user_addr = (u64)(unsigned long)queue->vring.used,
3589ed67cdcSSasha Levin 	};
3599ed67cdcSSasha Levin 
3609ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr);
3619ed67cdcSSasha Levin 	if (r < 0)
3629ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_ADDR failed");
3639ed67cdcSSasha Levin 
3649ed67cdcSSasha Levin 	return 0;
3659ed67cdcSSasha Levin }
3669ed67cdcSSasha Levin 
3679ed67cdcSSasha Levin static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
3689ed67cdcSSasha Levin {
3699ed67cdcSSasha Levin 	struct net_dev *ndev = dev;
3709ed67cdcSSasha Levin 	struct kvm_irqfd irq;
3719ed67cdcSSasha Levin 	struct vhost_vring_file file;
3729ed67cdcSSasha Levin 	int r;
3739ed67cdcSSasha Levin 
3749ed67cdcSSasha Levin 	if (ndev->vhost_fd == 0)
3759ed67cdcSSasha Levin 		return;
3769ed67cdcSSasha Levin 
3779ed67cdcSSasha Levin 	irq = (struct kvm_irqfd) {
3789ed67cdcSSasha Levin 		.gsi	= gsi,
3799ed67cdcSSasha Levin 		.fd	= eventfd(0, 0),
3809ed67cdcSSasha Levin 	};
3819ed67cdcSSasha Levin 	file = (struct vhost_vring_file) {
3829ed67cdcSSasha Levin 		.index	= vq,
3839ed67cdcSSasha Levin 		.fd	= irq.fd,
3849ed67cdcSSasha Levin 	};
3859ed67cdcSSasha Levin 
3869ed67cdcSSasha Levin 	r = ioctl(kvm->vm_fd, KVM_IRQFD, &irq);
3879ed67cdcSSasha Levin 	if (r < 0)
3889ed67cdcSSasha Levin 		die_perror("KVM_IRQFD failed");
3899ed67cdcSSasha Levin 
3909ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file);
3919ed67cdcSSasha Levin 	if (r < 0)
3929ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_CALL failed");
3939ed67cdcSSasha Levin 	file.fd = ndev->tap_fd;
3949ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file);
3959ed67cdcSSasha Levin 	if (r != 0)
3969ed67cdcSSasha Levin 		die("VHOST_NET_SET_BACKEND failed %d", errno);
3979ed67cdcSSasha Levin 
3989ed67cdcSSasha Levin }
3999ed67cdcSSasha Levin 
4009ed67cdcSSasha Levin static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
4019ed67cdcSSasha Levin {
4029ed67cdcSSasha Levin 	struct net_dev *ndev = dev;
4039ed67cdcSSasha Levin 	struct vhost_vring_file file = {
4049ed67cdcSSasha Levin 		.index	= vq,
4059ed67cdcSSasha Levin 		.fd	= efd,
4069ed67cdcSSasha Levin 	};
4079ed67cdcSSasha Levin 	int r;
4089ed67cdcSSasha Levin 
4099ed67cdcSSasha Levin 	if (ndev->vhost_fd == 0)
4109ed67cdcSSasha Levin 		return;
4119ed67cdcSSasha Levin 
4129ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file);
4139ed67cdcSSasha Levin 	if (r < 0)
4149ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_KICK failed");
41578a2a3e2SSasha Levin }
41678a2a3e2SSasha Levin 
41778a2a3e2SSasha Levin static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
41878a2a3e2SSasha Levin {
4199a6d73f1SSasha Levin 	struct net_dev *ndev = dev;
4209a6d73f1SSasha Levin 
4219a6d73f1SSasha Levin 	virtio_net_handle_callback(kvm, ndev, vq);
42278a2a3e2SSasha Levin 
42378a2a3e2SSasha Levin 	return 0;
42478a2a3e2SSasha Levin }
42578a2a3e2SSasha Levin 
42678a2a3e2SSasha Levin static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq)
42778a2a3e2SSasha Levin {
42878a2a3e2SSasha Levin 	struct net_dev *ndev = dev;
42978a2a3e2SSasha Levin 
43078a2a3e2SSasha Levin 	return ndev->vqs[vq].pfn;
43178a2a3e2SSasha Levin }
43278a2a3e2SSasha Levin 
43378a2a3e2SSasha Levin static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
43478a2a3e2SSasha Levin {
4350f4f51a8SAsias He 	/* FIXME: dynamic */
43678a2a3e2SSasha Levin 	return VIRTIO_NET_QUEUE_SIZE;
43778a2a3e2SSasha Levin }
43878a2a3e2SSasha Levin 
4390f4f51a8SAsias He static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size)
4400f4f51a8SAsias He {
4410f4f51a8SAsias He 	/* FIXME: dynamic */
4420f4f51a8SAsias He 	return size;
4430f4f51a8SAsias He }
4440f4f51a8SAsias He 
4451c47ce69SSasha Levin static struct virtio_ops net_dev_virtio_ops = (struct virtio_ops) {
4461c47ce69SSasha Levin 	.get_config		= get_config,
4471c47ce69SSasha Levin 	.get_host_features	= get_host_features,
4481c47ce69SSasha Levin 	.set_guest_features	= set_guest_features,
4491c47ce69SSasha Levin 	.init_vq		= init_vq,
4501c47ce69SSasha Levin 	.get_pfn_vq		= get_pfn_vq,
4511c47ce69SSasha Levin 	.get_size_vq		= get_size_vq,
4520f4f51a8SAsias He 	.set_size_vq		= set_size_vq,
4530f4f51a8SAsias He 	.notify_vq		= notify_vq,
4549ed67cdcSSasha Levin 	.notify_vq_gsi		= notify_vq_gsi,
4559ed67cdcSSasha Levin 	.notify_vq_eventfd	= notify_vq_eventfd,
4561c47ce69SSasha Levin };
4571c47ce69SSasha Levin 
4589ed67cdcSSasha Levin static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev)
4599ed67cdcSSasha Levin {
46092c1f37bSSasha Levin 	u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX;
4619ed67cdcSSasha Levin 	struct vhost_memory *mem;
4629ed67cdcSSasha Levin 	int r;
4639ed67cdcSSasha Levin 
4649ed67cdcSSasha Levin 	ndev->vhost_fd = open("/dev/vhost-net", O_RDWR);
4659ed67cdcSSasha Levin 	if (ndev->vhost_fd < 0)
4669ed67cdcSSasha Levin 		die_perror("Failed openning vhost-net device");
4679ed67cdcSSasha Levin 
4688b795457SAsias He 	mem = calloc(1, sizeof(*mem) + sizeof(struct vhost_memory_region));
4699ed67cdcSSasha Levin 	if (mem == NULL)
4709ed67cdcSSasha Levin 		die("Failed allocating memory for vhost memory map");
4719ed67cdcSSasha Levin 
4729ed67cdcSSasha Levin 	mem->nregions = 1;
4739ed67cdcSSasha Levin 	mem->regions[0] = (struct vhost_memory_region) {
4749ed67cdcSSasha Levin 		.guest_phys_addr	= 0,
4759ed67cdcSSasha Levin 		.memory_size		= kvm->ram_size,
476b4108023SAsias He 		.userspace_addr		= (unsigned long)kvm->ram_start,
4779ed67cdcSSasha Levin 	};
4789ed67cdcSSasha Levin 
4799ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER);
4809ed67cdcSSasha Levin 	if (r != 0)
4819ed67cdcSSasha Levin 		die_perror("VHOST_SET_OWNER failed");
4829ed67cdcSSasha Levin 
4839ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features);
4849ed67cdcSSasha Levin 	if (r != 0)
4859ed67cdcSSasha Levin 		die_perror("VHOST_SET_FEATURES failed");
4869ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem);
4879ed67cdcSSasha Levin 	if (r != 0)
4889ed67cdcSSasha Levin 		die_perror("VHOST_SET_MEM_TABLE failed");
489627d6874SAsias He 
490627d6874SAsias He 	ndev->vdev.use_vhost = true;
491627d6874SAsias He 
4929ed67cdcSSasha Levin 	free(mem);
4939ed67cdcSSasha Levin }
4949ed67cdcSSasha Levin 
4955f225124SSasha Levin static inline void str_to_mac(const char *str, char *mac)
4965f225124SSasha Levin {
4975f225124SSasha Levin 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
4985f225124SSasha Levin 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
4995f225124SSasha Levin }
5004346fd8fSSasha Levin static int set_net_param(struct kvm *kvm, struct virtio_net_params *p,
5014346fd8fSSasha Levin 			const char *param, const char *val)
5025f225124SSasha Levin {
5035f225124SSasha Levin 	if (strcmp(param, "guest_mac") == 0) {
5045f225124SSasha Levin 		str_to_mac(val, p->guest_mac);
5055f225124SSasha Levin 	} else if (strcmp(param, "mode") == 0) {
5065f225124SSasha Levin 		if (!strncmp(val, "user", 4)) {
5075f225124SSasha Levin 			int i;
5085f225124SSasha Levin 
5095f225124SSasha Levin 			for (i = 0; i < kvm->cfg.num_net_devices; i++)
5105f225124SSasha Levin 				if (kvm->cfg.net_params[i].mode == NET_MODE_USER)
5115f225124SSasha Levin 					die("Only one usermode network device allowed at a time");
5125f225124SSasha Levin 			p->mode = NET_MODE_USER;
5135f225124SSasha Levin 		} else if (!strncmp(val, "tap", 3)) {
5145f225124SSasha Levin 			p->mode = NET_MODE_TAP;
5155f225124SSasha Levin 		} else if (!strncmp(val, "none", 4)) {
5165f225124SSasha Levin 			kvm->cfg.no_net = 1;
5175f225124SSasha Levin 			return -1;
5185f225124SSasha Levin 		} else
5195f225124SSasha Levin 			die("Unknown network mode %s, please use user, tap or none", kvm->cfg.network);
5205f225124SSasha Levin 	} else if (strcmp(param, "script") == 0) {
5215f225124SSasha Levin 		p->script = strdup(val);
5225f225124SSasha Levin 	} else if (strcmp(param, "guest_ip") == 0) {
5235f225124SSasha Levin 		p->guest_ip = strdup(val);
5245f225124SSasha Levin 	} else if (strcmp(param, "host_ip") == 0) {
5255f225124SSasha Levin 		p->host_ip = strdup(val);
5265f225124SSasha Levin 	} else if (strcmp(param, "trans") == 0) {
5275f225124SSasha Levin 		p->trans = strdup(val);
5285f225124SSasha Levin 	} else if (strcmp(param, "vhost") == 0) {
5295f225124SSasha Levin 		p->vhost = atoi(val);
5305f225124SSasha Levin 	} else if (strcmp(param, "fd") == 0) {
5315f225124SSasha Levin 		p->fd = atoi(val);
5325f225124SSasha Levin 	} else
5335f225124SSasha Levin 		die("Unknown network parameter %s", param);
5345f225124SSasha Levin 
5355f225124SSasha Levin 	return 0;
5365f225124SSasha Levin }
5375f225124SSasha Levin 
5385f225124SSasha Levin int netdev_parser(const struct option *opt, const char *arg, int unset)
5395f225124SSasha Levin {
5405f225124SSasha Levin 	struct virtio_net_params p;
5415f225124SSasha Levin 	char *buf = NULL, *cmd = NULL, *cur = NULL;
5425f225124SSasha Levin 	bool on_cmd = true;
5435f225124SSasha Levin 	struct kvm *kvm = opt->ptr;
5445f225124SSasha Levin 
5455f225124SSasha Levin 	if (arg) {
5465f225124SSasha Levin 		buf = strdup(arg);
5475f225124SSasha Levin 		if (buf == NULL)
5485f225124SSasha Levin 			die("Failed allocating new net buffer");
5495f225124SSasha Levin 		cur = strtok(buf, ",=");
5505f225124SSasha Levin 	}
5515f225124SSasha Levin 
5525f225124SSasha Levin 	p = (struct virtio_net_params) {
5535f225124SSasha Levin 		.guest_ip	= DEFAULT_GUEST_ADDR,
5545f225124SSasha Levin 		.host_ip	= DEFAULT_HOST_ADDR,
5555f225124SSasha Levin 		.script		= DEFAULT_SCRIPT,
5565f225124SSasha Levin 		.mode		= NET_MODE_TAP,
5575f225124SSasha Levin 	};
5585f225124SSasha Levin 
5595f225124SSasha Levin 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
5605f225124SSasha Levin 	p.guest_mac[5] += kvm->cfg.num_net_devices;
5615f225124SSasha Levin 
5625f225124SSasha Levin 	while (cur) {
5635f225124SSasha Levin 		if (on_cmd) {
5645f225124SSasha Levin 			cmd = cur;
5655f225124SSasha Levin 		} else {
5664346fd8fSSasha Levin 			if (set_net_param(kvm, &p, cmd, cur) < 0)
5675f225124SSasha Levin 				goto done;
5685f225124SSasha Levin 		}
5695f225124SSasha Levin 		on_cmd = !on_cmd;
5705f225124SSasha Levin 
5715f225124SSasha Levin 		cur = strtok(NULL, ",=");
5725f225124SSasha Levin 	};
5735f225124SSasha Levin 
5745f225124SSasha Levin 	kvm->cfg.num_net_devices++;
5755f225124SSasha Levin 
5765f225124SSasha Levin 	kvm->cfg.net_params = realloc(kvm->cfg.net_params, kvm->cfg.num_net_devices * sizeof(*kvm->cfg.net_params));
5775f225124SSasha Levin 	if (kvm->cfg.net_params == NULL)
5785f225124SSasha Levin 		die("Failed adding new network device");
5795f225124SSasha Levin 
5805f225124SSasha Levin 	kvm->cfg.net_params[kvm->cfg.num_net_devices - 1] = p;
5815f225124SSasha Levin 
5825f225124SSasha Levin done:
5835f225124SSasha Levin 	free(buf);
5845f225124SSasha Levin 	return 0;
5855f225124SSasha Levin }
5865f225124SSasha Levin 
5875f225124SSasha Levin static int virtio_net__init_one(struct virtio_net_params *params)
5884f56d42cSAsias He {
589b5ee1ea7SAsias He 	int i;
5909a6d73f1SSasha Levin 	struct net_dev *ndev;
5919a6d73f1SSasha Levin 
5929a6d73f1SSasha Levin 	ndev = calloc(1, sizeof(struct net_dev));
5939a6d73f1SSasha Levin 	if (ndev == NULL)
5945f225124SSasha Levin 		return -ENOMEM;
5959a6d73f1SSasha Levin 
5969a6d73f1SSasha Levin 	list_add_tail(&ndev->list, &ndevs);
5979a6d73f1SSasha Levin 
5989a6d73f1SSasha Levin 	ndev->kvm = params->kvm;
5999a6d73f1SSasha Levin 
6009a6d73f1SSasha Levin 	mutex_init(&ndev->mutex);
6019a6d73f1SSasha Levin 	ndev->config.status = VIRTIO_NET_S_LINK_UP;
6022449f6e3SSasha Levin 
6030c54698eSAsias He 	for (i = 0 ; i < 6 ; i++) {
6049a6d73f1SSasha Levin 		ndev->config.mac[i]		= params->guest_mac[i];
6059a6d73f1SSasha Levin 		ndev->info.guest_mac.addr[i]	= params->guest_mac[i];
6069a6d73f1SSasha Levin 		ndev->info.host_mac.addr[i]	= params->host_mac[i];
6070c54698eSAsias He 	}
608f715177dSAsias He 
6099a6d73f1SSasha Levin 	ndev->mode = params->mode;
6109a6d73f1SSasha Levin 	if (ndev->mode == NET_MODE_TAP) {
6119a6d73f1SSasha Levin 		if (!virtio_net__tap_init(params, ndev))
612db051127SMichael Ellerman 			die_perror("You have requested a TAP device, but creation of one has failed because");
6139a6d73f1SSasha Levin 		ndev->ops = &tap_ops;
614b4fdde6dSAsias He 	} else {
6159a6d73f1SSasha Levin 		ndev->info.host_ip		= ntohl(inet_addr(params->host_ip));
6169a6d73f1SSasha Levin 		ndev->info.guest_ip		= ntohl(inet_addr(params->guest_ip));
6179a6d73f1SSasha Levin 		ndev->info.guest_netmask	= ntohl(inet_addr("255.255.255.0"));
6189a6d73f1SSasha Levin 		ndev->info.buf_nr		= 20,
6199a6d73f1SSasha Levin 		uip_init(&ndev->info);
6209a6d73f1SSasha Levin 		ndev->ops = &uip_ops;
621b4fdde6dSAsias He 	}
622b5ee1ea7SAsias He 
62369205aa1SAsias He 	if (params->trans && strcmp(params->trans, "mmio") == 0)
6244346fd8fSSasha Levin 		virtio_init(params->kvm, ndev, &ndev->vdev, &net_dev_virtio_ops,
62569205aa1SAsias He 			    VIRTIO_MMIO, PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET);
62669205aa1SAsias He 	else
6274346fd8fSSasha Levin 		virtio_init(params->kvm, ndev, &ndev->vdev, &net_dev_virtio_ops,
62802eca50cSAsias He 			    VIRTIO_PCI, PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET);
62927ab67f5SSasha Levin 
6309ed67cdcSSasha Levin 	if (params->vhost)
6319ed67cdcSSasha Levin 		virtio_net__vhost_init(params->kvm, ndev);
6329ed67cdcSSasha Levin 	else
6339a6d73f1SSasha Levin 		virtio_net__io_thread_init(params->kvm, ndev);
634cb83de6fSSasha Levin 
635d278197dSAsias He 	if (compat_id == -1)
63652f34d2cSAsias He 		compat_id = virtio_compat_add_message("virtio-net", "CONFIG_VIRTIO_NET");
6375f225124SSasha Levin 
6385f225124SSasha Levin 	return 0;
6395f225124SSasha Levin }
6405f225124SSasha Levin 
6415f225124SSasha Levin int virtio_net__init(struct kvm *kvm)
6425f225124SSasha Levin {
6435f225124SSasha Levin 	int i;
6445f225124SSasha Levin 
6455f225124SSasha Levin 	for (i = 0; i < kvm->cfg.num_net_devices; i++) {
6465f225124SSasha Levin 		kvm->cfg.net_params[i].kvm = kvm;
6475f225124SSasha Levin 		virtio_net__init_one(&kvm->cfg.net_params[i]);
6485f225124SSasha Levin 	}
6495f225124SSasha Levin 
6505f225124SSasha Levin 	if (kvm->cfg.num_net_devices == 0 && kvm->cfg.no_net == 0) {
6515f225124SSasha Levin 		struct virtio_net_params net_params;
6525f225124SSasha Levin 
6535f225124SSasha Levin 		net_params = (struct virtio_net_params) {
6545f225124SSasha Levin 			.guest_ip	= kvm->cfg.guest_ip,
6555f225124SSasha Levin 			.host_ip	= kvm->cfg.host_ip,
6565f225124SSasha Levin 			.kvm		= kvm,
6575f225124SSasha Levin 			.script		= kvm->cfg.script,
6585f225124SSasha Levin 			.mode		= NET_MODE_USER,
6595f225124SSasha Levin 		};
6605f225124SSasha Levin 		str_to_mac(kvm->cfg.guest_mac, net_params.guest_mac);
6615f225124SSasha Levin 		str_to_mac(kvm->cfg.host_mac, net_params.host_mac);
6625f225124SSasha Levin 
6635f225124SSasha Levin 		virtio_net__init_one(&net_params);
6645f225124SSasha Levin 	}
6655f225124SSasha Levin 
6665f225124SSasha Levin 	return 0;
6675f225124SSasha Levin }
66849a8afd1SSasha Levin virtio_dev_init(virtio_net__init);
6695f225124SSasha Levin 
6705f225124SSasha Levin int virtio_net__exit(struct kvm *kvm)
6715f225124SSasha Levin {
6725f225124SSasha Levin 	return 0;
6734f56d42cSAsias He }
67449a8afd1SSasha Levin virtio_dev_exit(virtio_net__exit);
675