xref: /kvmtool/virtio/net.c (revision abd4a801d7586346c7637c2080fe9ccdecc7bbc7)
131638bcaSCyrill Gorcunov #include "kvm/virtio-pci-dev.h"
2b5ee1ea7SAsias He #include "kvm/virtio-net.h"
34f56d42cSAsias He #include "kvm/virtio.h"
44f56d42cSAsias He #include "kvm/types.h"
54f56d42cSAsias He #include "kvm/mutex.h"
64f56d42cSAsias He #include "kvm/util.h"
74f56d42cSAsias He #include "kvm/kvm.h"
82449f6e3SSasha Levin #include "kvm/irq.h"
9b5ee1ea7SAsias He #include "kvm/uip.h"
10cb83de6fSSasha Levin #include "kvm/guest_compat.h"
114f56d42cSAsias He 
129ed67cdcSSasha Levin #include <linux/vhost.h>
134f56d42cSAsias He #include <linux/virtio_net.h>
144f56d42cSAsias He #include <linux/if_tun.h>
1578a2a3e2SSasha Levin #include <linux/types.h>
16c229370aSIngo Molnar 
17c229370aSIngo Molnar #include <arpa/inet.h>
184f56d42cSAsias He #include <net/if.h>
19c229370aSIngo Molnar 
20c229370aSIngo Molnar #include <unistd.h>
214f56d42cSAsias He #include <fcntl.h>
22c229370aSIngo Molnar 
23cb7202c1SSasha Levin #include <sys/socket.h>
24c229370aSIngo Molnar #include <sys/ioctl.h>
25c229370aSIngo Molnar #include <sys/types.h>
2673b7d038SAmos Kong #include <sys/wait.h>
279ed67cdcSSasha Levin #include <sys/eventfd.h>
284f56d42cSAsias He 
294059ad8bSAsias He #define VIRTIO_NET_QUEUE_SIZE		256
30*abd4a801SSasha Levin #define VIRTIO_NET_NUM_QUEUES		3
314f56d42cSAsias He #define VIRTIO_NET_RX_QUEUE		0
324f56d42cSAsias He #define VIRTIO_NET_TX_QUEUE		1
33*abd4a801SSasha Levin #define VIRTIO_NET_CTRL_QUEUE		2
344f56d42cSAsias He 
35b4fdde6dSAsias He struct net_dev;
36b4fdde6dSAsias He 
37b4fdde6dSAsias He struct net_dev_operations {
38b4fdde6dSAsias He 	int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
39b4fdde6dSAsias He 	int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
40b4fdde6dSAsias He };
41b4fdde6dSAsias He 
428626798bSAsias He struct net_dev {
43d3476f7dSSasha Levin 	struct mutex			mutex;
4402eca50cSAsias He 	struct virtio_device		vdev;
459a6d73f1SSasha Levin 	struct list_head		list;
464f56d42cSAsias He 
474f56d42cSAsias He 	struct virt_queue		vqs[VIRTIO_NET_NUM_QUEUES];
48c229370aSIngo Molnar 	struct virtio_net_config	config;
4978a2a3e2SSasha Levin 	u32				features;
504f56d42cSAsias He 
51c4aa7c02SPekka Enberg 	pthread_t			io_rx_thread;
52d3476f7dSSasha Levin 	struct mutex			io_rx_lock;
53c4aa7c02SPekka Enberg 	pthread_cond_t			io_rx_cond;
54c4aa7c02SPekka Enberg 
55c4aa7c02SPekka Enberg 	pthread_t			io_tx_thread;
56d3476f7dSSasha Levin 	struct mutex			io_tx_lock;
57c4aa7c02SPekka Enberg 	pthread_cond_t			io_tx_cond;
58c4aa7c02SPekka Enberg 
599ed67cdcSSasha Levin 	int				vhost_fd;
604f56d42cSAsias He 	int				tap_fd;
614f56d42cSAsias He 	char				tap_name[IFNAMSIZ];
62bb1a32f1SAsias He 
63bb1a32f1SAsias He 	int				mode;
64bb1a32f1SAsias He 
65b5ee1ea7SAsias He 	struct uip_info			info;
66b4fdde6dSAsias He 	struct net_dev_operations	*ops;
679a6d73f1SSasha Levin 	struct kvm			*kvm;
684f56d42cSAsias He };
694f56d42cSAsias He 
709a6d73f1SSasha Levin static LIST_HEAD(ndevs);
71312c62d1SSasha Levin static int compat_id = -1;
724f56d42cSAsias He 
73c4aa7c02SPekka Enberg static void *virtio_net_rx_thread(void *p)
744f56d42cSAsias He {
754f56d42cSAsias He 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
764f56d42cSAsias He 	struct virt_queue *vq;
7743835ac9SSasha Levin 	struct kvm *kvm;
789a6d73f1SSasha Levin 	struct net_dev *ndev = p;
793fdf659dSSasha Levin 	u16 out, in;
803fdf659dSSasha Levin 	u16 head;
814f56d42cSAsias He 	int len;
824f56d42cSAsias He 
83a4d8c55eSSasha Levin 	kvm__set_thread_name("virtio-net-rx");
84a4d8c55eSSasha Levin 
859a6d73f1SSasha Levin 	kvm = ndev->kvm;
869a6d73f1SSasha Levin 	vq = &ndev->vqs[VIRTIO_NET_RX_QUEUE];
87c4aa7c02SPekka Enberg 
88c4aa7c02SPekka Enberg 	while (1) {
899a6d73f1SSasha Levin 		mutex_lock(&ndev->io_rx_lock);
90c4aa7c02SPekka Enberg 		if (!virt_queue__available(vq))
91d3476f7dSSasha Levin 			pthread_cond_wait(&ndev->io_rx_cond, &ndev->io_rx_lock.mutex);
929a6d73f1SSasha Levin 		mutex_unlock(&ndev->io_rx_lock);
934f56d42cSAsias He 
944f56d42cSAsias He 		while (virt_queue__available(vq)) {
9543835ac9SSasha Levin 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
969a6d73f1SSasha Levin 			len = ndev->ops->rx(iov, in, ndev);
97246c8347SAsias He 			virt_queue__set_used_elem(vq, head, len);
987f5ffaf5SAsias He 
99c4aa7c02SPekka Enberg 			/* We should interrupt guest right now, otherwise latency is huge. */
10092c1f37bSSasha Levin 			if (virtio_queue__should_signal(&ndev->vqs[VIRTIO_NET_RX_QUEUE]))
10102eca50cSAsias He 				ndev->vdev.ops->signal_vq(kvm, &ndev->vdev,
10292c1f37bSSasha Levin 							   VIRTIO_NET_RX_QUEUE);
1034f56d42cSAsias He 		}
104c4aa7c02SPekka Enberg 	}
105c4aa7c02SPekka Enberg 
106c4aa7c02SPekka Enberg 	pthread_exit(NULL);
107c4aa7c02SPekka Enberg 	return NULL;
108c4aa7c02SPekka Enberg 
109c4aa7c02SPekka Enberg }
110c4aa7c02SPekka Enberg 
111c4aa7c02SPekka Enberg static void *virtio_net_tx_thread(void *p)
1124f56d42cSAsias He {
1134f56d42cSAsias He 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
1144f56d42cSAsias He 	struct virt_queue *vq;
11543835ac9SSasha Levin 	struct kvm *kvm;
1169a6d73f1SSasha Levin 	struct net_dev *ndev = p;
1173fdf659dSSasha Levin 	u16 out, in;
1183fdf659dSSasha Levin 	u16 head;
1194f56d42cSAsias He 	int len;
1204f56d42cSAsias He 
121a4d8c55eSSasha Levin 	kvm__set_thread_name("virtio-net-tx");
122a4d8c55eSSasha Levin 
1239a6d73f1SSasha Levin 	kvm = ndev->kvm;
1249a6d73f1SSasha Levin 	vq = &ndev->vqs[VIRTIO_NET_TX_QUEUE];
125c4aa7c02SPekka Enberg 
126c4aa7c02SPekka Enberg 	while (1) {
1279a6d73f1SSasha Levin 		mutex_lock(&ndev->io_tx_lock);
128c4aa7c02SPekka Enberg 		if (!virt_queue__available(vq))
129d3476f7dSSasha Levin 			pthread_cond_wait(&ndev->io_tx_cond, &ndev->io_tx_lock.mutex);
1309a6d73f1SSasha Levin 		mutex_unlock(&ndev->io_tx_lock);
1314f56d42cSAsias He 
1324f56d42cSAsias He 		while (virt_queue__available(vq)) {
13343835ac9SSasha Levin 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
1349a6d73f1SSasha Levin 			len = ndev->ops->tx(iov, out, ndev);
1354f56d42cSAsias He 			virt_queue__set_used_elem(vq, head, len);
1364f56d42cSAsias He 		}
1374f56d42cSAsias He 
13892c1f37bSSasha Levin 		if (virtio_queue__should_signal(&ndev->vqs[VIRTIO_NET_TX_QUEUE]))
13902eca50cSAsias He 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, VIRTIO_NET_TX_QUEUE);
1404f56d42cSAsias He 	}
1414f56d42cSAsias He 
142c4aa7c02SPekka Enberg 	pthread_exit(NULL);
143407475bfSPekka Enberg 
144c4aa7c02SPekka Enberg 	return NULL;
145c4aa7c02SPekka Enberg 
146c4aa7c02SPekka Enberg }
147407475bfSPekka Enberg 
148*abd4a801SSasha Levin static void virtio_net_handle_ctrl(struct kvm *kvm, struct net_dev *ndev)
149*abd4a801SSasha Levin {
150*abd4a801SSasha Levin 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
151*abd4a801SSasha Levin 	u16 out, in, head;
152*abd4a801SSasha Levin 	struct virtio_net_ctrl_hdr *ctrl;
153*abd4a801SSasha Levin 	virtio_net_ctrl_ack *ack;
154*abd4a801SSasha Levin 
155*abd4a801SSasha Levin 	head = virt_queue__get_iov(&ndev->vqs[VIRTIO_NET_CTRL_QUEUE], iov, &out, &in, kvm);
156*abd4a801SSasha Levin 	ctrl = iov[0].iov_base;
157*abd4a801SSasha Levin 	ack = iov[out].iov_base;
158*abd4a801SSasha Levin 
159*abd4a801SSasha Levin 	switch (ctrl->class) {
160*abd4a801SSasha Levin 	default:
161*abd4a801SSasha Levin 		*ack = VIRTIO_NET_ERR;
162*abd4a801SSasha Levin 		break;
163*abd4a801SSasha Levin 	}
164*abd4a801SSasha Levin 
165*abd4a801SSasha Levin 	virt_queue__set_used_elem(&ndev->vqs[VIRTIO_NET_CTRL_QUEUE], head, iov[out].iov_len);
166*abd4a801SSasha Levin 
167*abd4a801SSasha Levin 	if (virtio_queue__should_signal(&ndev->vqs[VIRTIO_NET_CTRL_QUEUE]))
168*abd4a801SSasha Levin 		ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, VIRTIO_NET_CTRL_QUEUE);
169*abd4a801SSasha Levin 
170*abd4a801SSasha Levin 	return;
171*abd4a801SSasha Levin }
172*abd4a801SSasha Levin 
1739a6d73f1SSasha Levin static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue)
1744f56d42cSAsias He {
1759a6d73f1SSasha Levin 	switch (queue) {
176b5ee1ea7SAsias He 	case VIRTIO_NET_TX_QUEUE:
1779a6d73f1SSasha Levin 		mutex_lock(&ndev->io_tx_lock);
1789a6d73f1SSasha Levin 		pthread_cond_signal(&ndev->io_tx_cond);
1799a6d73f1SSasha Levin 		mutex_unlock(&ndev->io_tx_lock);
180407475bfSPekka Enberg 		break;
181b5ee1ea7SAsias He 	case VIRTIO_NET_RX_QUEUE:
1829a6d73f1SSasha Levin 		mutex_lock(&ndev->io_rx_lock);
1839a6d73f1SSasha Levin 		pthread_cond_signal(&ndev->io_rx_cond);
1849a6d73f1SSasha Levin 		mutex_unlock(&ndev->io_rx_lock);
185407475bfSPekka Enberg 		break;
186*abd4a801SSasha Levin 	case VIRTIO_NET_CTRL_QUEUE:
187*abd4a801SSasha Levin 		virtio_net_handle_ctrl(kvm, ndev);
188*abd4a801SSasha Levin 		break;
189407475bfSPekka Enberg 	default:
1909a6d73f1SSasha Levin 		pr_warning("Unknown queue index %u", queue);
191c4aa7c02SPekka Enberg 	}
1924f56d42cSAsias He }
1934f56d42cSAsias He 
1949a6d73f1SSasha Levin static bool virtio_net__tap_init(const struct virtio_net_params *params,
1959a6d73f1SSasha Levin 					struct net_dev *ndev)
1964f56d42cSAsias He {
197cb7202c1SSasha Levin 	int sock = socket(AF_INET, SOCK_STREAM, 0);
198f715177dSAsias He 	int pid, status, offload, hdr_len;
199cb7202c1SSasha Levin 	struct sockaddr_in sin = {0};
200246c8347SAsias He 	struct ifreq ifr;
2014f56d42cSAsias He 
202f19edd1eSSasha Levin 	/* Did the user already gave us the FD? */
203f19edd1eSSasha Levin 	if (params->fd) {
204f19edd1eSSasha Levin 		ndev->tap_fd = params->fd;
205f19edd1eSSasha Levin 		return 1;
206f19edd1eSSasha Levin 	}
207f19edd1eSSasha Levin 
2089a6d73f1SSasha Levin 	ndev->tap_fd = open("/dev/net/tun", O_RDWR);
2099a6d73f1SSasha Levin 	if (ndev->tap_fd < 0) {
2104542f276SCyrill Gorcunov 		pr_warning("Unable to open /dev/net/tun");
2113b02f580SSasha Levin 		goto fail;
2123b02f580SSasha Levin 	}
2134f56d42cSAsias He 
2144f56d42cSAsias He 	memset(&ifr, 0, sizeof(ifr));
215246c8347SAsias He 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
2169a6d73f1SSasha Levin 	if (ioctl(ndev->tap_fd, TUNSETIFF, &ifr) < 0) {
2174542f276SCyrill Gorcunov 		pr_warning("Config tap device error. Are you root?");
2183b02f580SSasha Levin 		goto fail;
2193b02f580SSasha Levin 	}
2204f56d42cSAsias He 
2219a6d73f1SSasha Levin 	strncpy(ndev->tap_name, ifr.ifr_name, sizeof(ndev->tap_name));
2224f56d42cSAsias He 
2239a6d73f1SSasha Levin 	if (ioctl(ndev->tap_fd, TUNSETNOCSUM, 1) < 0) {
2244542f276SCyrill Gorcunov 		pr_warning("Config tap device TUNSETNOCSUM error");
225246c8347SAsias He 		goto fail;
226246c8347SAsias He 	}
227246c8347SAsias He 
228246c8347SAsias He 	hdr_len = sizeof(struct virtio_net_hdr);
2299a6d73f1SSasha Levin 	if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
2304542f276SCyrill Gorcunov 		pr_warning("Config tap device TUNSETVNETHDRSZ error");
231246c8347SAsias He 
232246c8347SAsias He 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
2339a6d73f1SSasha Levin 	if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
2344542f276SCyrill Gorcunov 		pr_warning("Config tap device TUNSETOFFLOAD error");
235246c8347SAsias He 		goto fail;
236246c8347SAsias He 	}
2374f56d42cSAsias He 
23873b7d038SAmos Kong 	if (strcmp(params->script, "none")) {
23973b7d038SAmos Kong 		pid = fork();
24073b7d038SAmos Kong 		if (pid == 0) {
2419a6d73f1SSasha Levin 			execl(params->script, params->script, ndev->tap_name, NULL);
24273b7d038SAmos Kong 			_exit(1);
24373b7d038SAmos Kong 		} else {
24473b7d038SAmos Kong 			waitpid(pid, &status, 0);
24573b7d038SAmos Kong 			if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
2464542f276SCyrill Gorcunov 				pr_warning("Fail to setup tap by %s", params->script);
24773b7d038SAmos Kong 				goto fail;
24873b7d038SAmos Kong 			}
24973b7d038SAmos Kong 		}
25073b7d038SAmos Kong 	} else {
251cb7202c1SSasha Levin 		memset(&ifr, 0, sizeof(ifr));
2529a6d73f1SSasha Levin 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
253bdfcfca6SSasha Levin 		sin.sin_addr.s_addr = inet_addr(params->host_ip);
254cb7202c1SSasha Levin 		memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
255cb7202c1SSasha Levin 		ifr.ifr_addr.sa_family = AF_INET;
2563b02f580SSasha Levin 		if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
2574542f276SCyrill Gorcunov 			pr_warning("Could not set ip address on tap device");
2583b02f580SSasha Levin 			goto fail;
2593b02f580SSasha Levin 		}
26073b7d038SAmos Kong 	}
261cb7202c1SSasha Levin 
262cb7202c1SSasha Levin 	memset(&ifr, 0, sizeof(ifr));
2639a6d73f1SSasha Levin 	strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
264cb7202c1SSasha Levin 	ioctl(sock, SIOCGIFFLAGS, &ifr);
265cb7202c1SSasha Levin 	ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
266cb7202c1SSasha Levin 	if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
2674542f276SCyrill Gorcunov 		pr_warning("Could not bring tap device up");
268cb7202c1SSasha Levin 
269cb7202c1SSasha Levin 	close(sock);
2703b02f580SSasha Levin 
2713b02f580SSasha Levin 	return 1;
2723b02f580SSasha Levin 
2733b02f580SSasha Levin fail:
2743b02f580SSasha Levin 	if (sock >= 0)
2753b02f580SSasha Levin 		close(sock);
2769a6d73f1SSasha Levin 	if (ndev->tap_fd >= 0)
2779a6d73f1SSasha Levin 		close(ndev->tap_fd);
2783b02f580SSasha Levin 
2793b02f580SSasha Levin 	return 0;
2804f56d42cSAsias He }
2814f56d42cSAsias He 
2829a6d73f1SSasha Levin static void virtio_net__io_thread_init(struct kvm *kvm, struct net_dev *ndev)
283c4aa7c02SPekka Enberg {
284d3476f7dSSasha Levin 	mutex_init(&ndev->io_tx_lock);
285d3476f7dSSasha Levin 	mutex_init(&ndev->io_rx_lock);
286c4aa7c02SPekka Enberg 
2879a6d73f1SSasha Levin 	pthread_cond_init(&ndev->io_tx_cond, NULL);
28806e5512fSAsias He 	pthread_cond_init(&ndev->io_rx_cond, NULL);
289c4aa7c02SPekka Enberg 
2909a6d73f1SSasha Levin 	pthread_create(&ndev->io_tx_thread, NULL, virtio_net_tx_thread, ndev);
29106e5512fSAsias He 	pthread_create(&ndev->io_rx_thread, NULL, virtio_net_rx_thread, ndev);
292c4aa7c02SPekka Enberg }
293c4aa7c02SPekka Enberg 
294b4fdde6dSAsias He static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
295b4fdde6dSAsias He {
296b4fdde6dSAsias He 	return writev(ndev->tap_fd, iov, out);
297b4fdde6dSAsias He }
298b4fdde6dSAsias He 
299b4fdde6dSAsias He static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
300b4fdde6dSAsias He {
301b4fdde6dSAsias He 	return readv(ndev->tap_fd, iov, in);
302b4fdde6dSAsias He }
303b4fdde6dSAsias He 
304b4fdde6dSAsias He static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
305b4fdde6dSAsias He {
306b4fdde6dSAsias He 	return uip_tx(iov, out, &ndev->info);
307b4fdde6dSAsias He }
308b4fdde6dSAsias He 
309b4fdde6dSAsias He static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
310b4fdde6dSAsias He {
311b4fdde6dSAsias He 	return uip_rx(iov, in, &ndev->info);
312b4fdde6dSAsias He }
313b4fdde6dSAsias He 
314b4fdde6dSAsias He static struct net_dev_operations tap_ops = {
315b4fdde6dSAsias He 	.rx	= tap_ops_rx,
316b4fdde6dSAsias He 	.tx	= tap_ops_tx,
317b4fdde6dSAsias He };
318b4fdde6dSAsias He 
319b4fdde6dSAsias He static struct net_dev_operations uip_ops = {
320b4fdde6dSAsias He 	.rx	= uip_ops_rx,
321b4fdde6dSAsias He 	.tx	= uip_ops_tx,
322b4fdde6dSAsias He };
323b4fdde6dSAsias He 
324c5ae742bSSasha Levin static u8 *get_config(struct kvm *kvm, void *dev)
32578a2a3e2SSasha Levin {
32678a2a3e2SSasha Levin 	struct net_dev *ndev = dev;
32778a2a3e2SSasha Levin 
328c5ae742bSSasha Levin 	return ((u8 *)(&ndev->config));
32978a2a3e2SSasha Levin }
33078a2a3e2SSasha Levin 
33178a2a3e2SSasha Levin static u32 get_host_features(struct kvm *kvm, void *dev)
33278a2a3e2SSasha Levin {
33378a2a3e2SSasha Levin 	return 1UL << VIRTIO_NET_F_MAC
33478a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_CSUM
33578a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_HOST_UFO
33678a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_HOST_TSO4
33778a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_HOST_TSO6
33878a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_GUEST_UFO
33978a2a3e2SSasha Levin 		| 1UL << VIRTIO_NET_F_GUEST_TSO4
34092c1f37bSSasha Levin 		| 1UL << VIRTIO_NET_F_GUEST_TSO6
341754c8ce3SSasha Levin 		| 1UL << VIRTIO_RING_F_EVENT_IDX
342*abd4a801SSasha Levin 		| 1UL << VIRTIO_RING_F_INDIRECT_DESC
343*abd4a801SSasha Levin 		| 1UL << VIRTIO_NET_F_CTRL_VQ;
34478a2a3e2SSasha Levin }
34578a2a3e2SSasha Levin 
34678a2a3e2SSasha Levin static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
34778a2a3e2SSasha Levin {
34878a2a3e2SSasha Levin 	struct net_dev *ndev = dev;
34978a2a3e2SSasha Levin 
35078a2a3e2SSasha Levin 	ndev->features = features;
35178a2a3e2SSasha Levin }
35278a2a3e2SSasha Levin 
353c59ba304SWill Deacon static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 page_size, u32 align,
354c59ba304SWill Deacon 		   u32 pfn)
35578a2a3e2SSasha Levin {
3569ed67cdcSSasha Levin 	struct vhost_vring_state state = { .index = vq };
3579ed67cdcSSasha Levin 	struct vhost_vring_addr addr;
35878a2a3e2SSasha Levin 	struct net_dev *ndev = dev;
35978a2a3e2SSasha Levin 	struct virt_queue *queue;
36078a2a3e2SSasha Levin 	void *p;
3619ed67cdcSSasha Levin 	int r;
36278a2a3e2SSasha Levin 
363312c62d1SSasha Levin 	compat__remove_message(compat_id);
36478a2a3e2SSasha Levin 
36578a2a3e2SSasha Levin 	queue		= &ndev->vqs[vq];
36678a2a3e2SSasha Levin 	queue->pfn	= pfn;
367c59ba304SWill Deacon 	p		= guest_flat_to_host(kvm, queue->pfn * page_size);
36878a2a3e2SSasha Levin 
369c59ba304SWill Deacon 	vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, align);
37078a2a3e2SSasha Levin 
3719ed67cdcSSasha Levin 	if (ndev->vhost_fd == 0)
37278a2a3e2SSasha Levin 		return 0;
3739ed67cdcSSasha Levin 
3749ed67cdcSSasha Levin 	state.num = queue->vring.num;
3759ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state);
3769ed67cdcSSasha Levin 	if (r < 0)
3779ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_NUM failed");
3789ed67cdcSSasha Levin 	state.num = 0;
3799ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state);
3809ed67cdcSSasha Levin 	if (r < 0)
3819ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_BASE failed");
3829ed67cdcSSasha Levin 
3839ed67cdcSSasha Levin 	addr = (struct vhost_vring_addr) {
3849ed67cdcSSasha Levin 		.index = vq,
3859ed67cdcSSasha Levin 		.desc_user_addr = (u64)(unsigned long)queue->vring.desc,
3869ed67cdcSSasha Levin 		.avail_user_addr = (u64)(unsigned long)queue->vring.avail,
3879ed67cdcSSasha Levin 		.used_user_addr = (u64)(unsigned long)queue->vring.used,
3889ed67cdcSSasha Levin 	};
3899ed67cdcSSasha Levin 
3909ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr);
3919ed67cdcSSasha Levin 	if (r < 0)
3929ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_ADDR failed");
3939ed67cdcSSasha Levin 
3949ed67cdcSSasha Levin 	return 0;
3959ed67cdcSSasha Levin }
3969ed67cdcSSasha Levin 
3979ed67cdcSSasha Levin static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
3989ed67cdcSSasha Levin {
3999ed67cdcSSasha Levin 	struct net_dev *ndev = dev;
4009ed67cdcSSasha Levin 	struct kvm_irqfd irq;
4019ed67cdcSSasha Levin 	struct vhost_vring_file file;
4029ed67cdcSSasha Levin 	int r;
4039ed67cdcSSasha Levin 
4049ed67cdcSSasha Levin 	if (ndev->vhost_fd == 0)
4059ed67cdcSSasha Levin 		return;
4069ed67cdcSSasha Levin 
4079ed67cdcSSasha Levin 	irq = (struct kvm_irqfd) {
4089ed67cdcSSasha Levin 		.gsi	= gsi,
4099ed67cdcSSasha Levin 		.fd	= eventfd(0, 0),
4109ed67cdcSSasha Levin 	};
4119ed67cdcSSasha Levin 	file = (struct vhost_vring_file) {
4129ed67cdcSSasha Levin 		.index	= vq,
4139ed67cdcSSasha Levin 		.fd	= irq.fd,
4149ed67cdcSSasha Levin 	};
4159ed67cdcSSasha Levin 
4169ed67cdcSSasha Levin 	r = ioctl(kvm->vm_fd, KVM_IRQFD, &irq);
4179ed67cdcSSasha Levin 	if (r < 0)
4189ed67cdcSSasha Levin 		die_perror("KVM_IRQFD failed");
4199ed67cdcSSasha Levin 
4209ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file);
4219ed67cdcSSasha Levin 	if (r < 0)
4229ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_CALL failed");
4239ed67cdcSSasha Levin 	file.fd = ndev->tap_fd;
4249ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file);
4259ed67cdcSSasha Levin 	if (r != 0)
4269ed67cdcSSasha Levin 		die("VHOST_NET_SET_BACKEND failed %d", errno);
4279ed67cdcSSasha Levin 
4289ed67cdcSSasha Levin }
4299ed67cdcSSasha Levin 
4309ed67cdcSSasha Levin static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
4319ed67cdcSSasha Levin {
4329ed67cdcSSasha Levin 	struct net_dev *ndev = dev;
4339ed67cdcSSasha Levin 	struct vhost_vring_file file = {
4349ed67cdcSSasha Levin 		.index	= vq,
4359ed67cdcSSasha Levin 		.fd	= efd,
4369ed67cdcSSasha Levin 	};
4379ed67cdcSSasha Levin 	int r;
4389ed67cdcSSasha Levin 
4399ed67cdcSSasha Levin 	if (ndev->vhost_fd == 0)
4409ed67cdcSSasha Levin 		return;
4419ed67cdcSSasha Levin 
4429ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file);
4439ed67cdcSSasha Levin 	if (r < 0)
4449ed67cdcSSasha Levin 		die_perror("VHOST_SET_VRING_KICK failed");
44578a2a3e2SSasha Levin }
44678a2a3e2SSasha Levin 
44778a2a3e2SSasha Levin static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
44878a2a3e2SSasha Levin {
4499a6d73f1SSasha Levin 	struct net_dev *ndev = dev;
4509a6d73f1SSasha Levin 
4519a6d73f1SSasha Levin 	virtio_net_handle_callback(kvm, ndev, vq);
45278a2a3e2SSasha Levin 
45378a2a3e2SSasha Levin 	return 0;
45478a2a3e2SSasha Levin }
45578a2a3e2SSasha Levin 
45678a2a3e2SSasha Levin static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq)
45778a2a3e2SSasha Levin {
45878a2a3e2SSasha Levin 	struct net_dev *ndev = dev;
45978a2a3e2SSasha Levin 
46078a2a3e2SSasha Levin 	return ndev->vqs[vq].pfn;
46178a2a3e2SSasha Levin }
46278a2a3e2SSasha Levin 
46378a2a3e2SSasha Levin static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
46478a2a3e2SSasha Levin {
4650f4f51a8SAsias He 	/* FIXME: dynamic */
46678a2a3e2SSasha Levin 	return VIRTIO_NET_QUEUE_SIZE;
46778a2a3e2SSasha Levin }
46878a2a3e2SSasha Levin 
4690f4f51a8SAsias He static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size)
4700f4f51a8SAsias He {
4710f4f51a8SAsias He 	/* FIXME: dynamic */
4720f4f51a8SAsias He 	return size;
4730f4f51a8SAsias He }
4740f4f51a8SAsias He 
4751c47ce69SSasha Levin static struct virtio_ops net_dev_virtio_ops = (struct virtio_ops) {
4761c47ce69SSasha Levin 	.get_config		= get_config,
4771c47ce69SSasha Levin 	.get_host_features	= get_host_features,
4781c47ce69SSasha Levin 	.set_guest_features	= set_guest_features,
4791c47ce69SSasha Levin 	.init_vq		= init_vq,
4801c47ce69SSasha Levin 	.get_pfn_vq		= get_pfn_vq,
4811c47ce69SSasha Levin 	.get_size_vq		= get_size_vq,
4820f4f51a8SAsias He 	.set_size_vq		= set_size_vq,
4830f4f51a8SAsias He 	.notify_vq		= notify_vq,
4849ed67cdcSSasha Levin 	.notify_vq_gsi		= notify_vq_gsi,
4859ed67cdcSSasha Levin 	.notify_vq_eventfd	= notify_vq_eventfd,
4861c47ce69SSasha Levin };
4871c47ce69SSasha Levin 
4889ed67cdcSSasha Levin static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev)
4899ed67cdcSSasha Levin {
49092c1f37bSSasha Levin 	u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX;
4919ed67cdcSSasha Levin 	struct vhost_memory *mem;
4929ed67cdcSSasha Levin 	int r;
4939ed67cdcSSasha Levin 
4949ed67cdcSSasha Levin 	ndev->vhost_fd = open("/dev/vhost-net", O_RDWR);
4959ed67cdcSSasha Levin 	if (ndev->vhost_fd < 0)
4969ed67cdcSSasha Levin 		die_perror("Failed openning vhost-net device");
4979ed67cdcSSasha Levin 
4988b795457SAsias He 	mem = calloc(1, sizeof(*mem) + sizeof(struct vhost_memory_region));
4999ed67cdcSSasha Levin 	if (mem == NULL)
5009ed67cdcSSasha Levin 		die("Failed allocating memory for vhost memory map");
5019ed67cdcSSasha Levin 
5029ed67cdcSSasha Levin 	mem->nregions = 1;
5039ed67cdcSSasha Levin 	mem->regions[0] = (struct vhost_memory_region) {
5049ed67cdcSSasha Levin 		.guest_phys_addr	= 0,
5059ed67cdcSSasha Levin 		.memory_size		= kvm->ram_size,
506b4108023SAsias He 		.userspace_addr		= (unsigned long)kvm->ram_start,
5079ed67cdcSSasha Levin 	};
5089ed67cdcSSasha Levin 
5099ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER);
5109ed67cdcSSasha Levin 	if (r != 0)
5119ed67cdcSSasha Levin 		die_perror("VHOST_SET_OWNER failed");
5129ed67cdcSSasha Levin 
5139ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features);
5149ed67cdcSSasha Levin 	if (r != 0)
5159ed67cdcSSasha Levin 		die_perror("VHOST_SET_FEATURES failed");
5169ed67cdcSSasha Levin 	r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem);
5179ed67cdcSSasha Levin 	if (r != 0)
5189ed67cdcSSasha Levin 		die_perror("VHOST_SET_MEM_TABLE failed");
519627d6874SAsias He 
520627d6874SAsias He 	ndev->vdev.use_vhost = true;
521627d6874SAsias He 
5229ed67cdcSSasha Levin 	free(mem);
5239ed67cdcSSasha Levin }
5249ed67cdcSSasha Levin 
5255f225124SSasha Levin static inline void str_to_mac(const char *str, char *mac)
5265f225124SSasha Levin {
5275f225124SSasha Levin 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
5285f225124SSasha Levin 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
5295f225124SSasha Levin }
5304346fd8fSSasha Levin static int set_net_param(struct kvm *kvm, struct virtio_net_params *p,
5314346fd8fSSasha Levin 			const char *param, const char *val)
5325f225124SSasha Levin {
5335f225124SSasha Levin 	if (strcmp(param, "guest_mac") == 0) {
5345f225124SSasha Levin 		str_to_mac(val, p->guest_mac);
5355f225124SSasha Levin 	} else if (strcmp(param, "mode") == 0) {
5365f225124SSasha Levin 		if (!strncmp(val, "user", 4)) {
5375f225124SSasha Levin 			int i;
5385f225124SSasha Levin 
5395f225124SSasha Levin 			for (i = 0; i < kvm->cfg.num_net_devices; i++)
5405f225124SSasha Levin 				if (kvm->cfg.net_params[i].mode == NET_MODE_USER)
5415f225124SSasha Levin 					die("Only one usermode network device allowed at a time");
5425f225124SSasha Levin 			p->mode = NET_MODE_USER;
5435f225124SSasha Levin 		} else if (!strncmp(val, "tap", 3)) {
5445f225124SSasha Levin 			p->mode = NET_MODE_TAP;
5455f225124SSasha Levin 		} else if (!strncmp(val, "none", 4)) {
5465f225124SSasha Levin 			kvm->cfg.no_net = 1;
5475f225124SSasha Levin 			return -1;
5485f225124SSasha Levin 		} else
5495f225124SSasha Levin 			die("Unknown network mode %s, please use user, tap or none", kvm->cfg.network);
5505f225124SSasha Levin 	} else if (strcmp(param, "script") == 0) {
5515f225124SSasha Levin 		p->script = strdup(val);
5525f225124SSasha Levin 	} else if (strcmp(param, "guest_ip") == 0) {
5535f225124SSasha Levin 		p->guest_ip = strdup(val);
5545f225124SSasha Levin 	} else if (strcmp(param, "host_ip") == 0) {
5555f225124SSasha Levin 		p->host_ip = strdup(val);
5565f225124SSasha Levin 	} else if (strcmp(param, "trans") == 0) {
5575f225124SSasha Levin 		p->trans = strdup(val);
5585f225124SSasha Levin 	} else if (strcmp(param, "vhost") == 0) {
5595f225124SSasha Levin 		p->vhost = atoi(val);
5605f225124SSasha Levin 	} else if (strcmp(param, "fd") == 0) {
5615f225124SSasha Levin 		p->fd = atoi(val);
5625f225124SSasha Levin 	} else
5635f225124SSasha Levin 		die("Unknown network parameter %s", param);
5645f225124SSasha Levin 
5655f225124SSasha Levin 	return 0;
5665f225124SSasha Levin }
5675f225124SSasha Levin 
5685f225124SSasha Levin int netdev_parser(const struct option *opt, const char *arg, int unset)
5695f225124SSasha Levin {
5705f225124SSasha Levin 	struct virtio_net_params p;
5715f225124SSasha Levin 	char *buf = NULL, *cmd = NULL, *cur = NULL;
5725f225124SSasha Levin 	bool on_cmd = true;
5735f225124SSasha Levin 	struct kvm *kvm = opt->ptr;
5745f225124SSasha Levin 
5755f225124SSasha Levin 	if (arg) {
5765f225124SSasha Levin 		buf = strdup(arg);
5775f225124SSasha Levin 		if (buf == NULL)
5785f225124SSasha Levin 			die("Failed allocating new net buffer");
5795f225124SSasha Levin 		cur = strtok(buf, ",=");
5805f225124SSasha Levin 	}
5815f225124SSasha Levin 
5825f225124SSasha Levin 	p = (struct virtio_net_params) {
5835f225124SSasha Levin 		.guest_ip	= DEFAULT_GUEST_ADDR,
5845f225124SSasha Levin 		.host_ip	= DEFAULT_HOST_ADDR,
5855f225124SSasha Levin 		.script		= DEFAULT_SCRIPT,
5865f225124SSasha Levin 		.mode		= NET_MODE_TAP,
5875f225124SSasha Levin 	};
5885f225124SSasha Levin 
5895f225124SSasha Levin 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
5905f225124SSasha Levin 	p.guest_mac[5] += kvm->cfg.num_net_devices;
5915f225124SSasha Levin 
5925f225124SSasha Levin 	while (cur) {
5935f225124SSasha Levin 		if (on_cmd) {
5945f225124SSasha Levin 			cmd = cur;
5955f225124SSasha Levin 		} else {
5964346fd8fSSasha Levin 			if (set_net_param(kvm, &p, cmd, cur) < 0)
5975f225124SSasha Levin 				goto done;
5985f225124SSasha Levin 		}
5995f225124SSasha Levin 		on_cmd = !on_cmd;
6005f225124SSasha Levin 
6015f225124SSasha Levin 		cur = strtok(NULL, ",=");
6025f225124SSasha Levin 	};
6035f225124SSasha Levin 
6045f225124SSasha Levin 	kvm->cfg.num_net_devices++;
6055f225124SSasha Levin 
6065f225124SSasha Levin 	kvm->cfg.net_params = realloc(kvm->cfg.net_params, kvm->cfg.num_net_devices * sizeof(*kvm->cfg.net_params));
6075f225124SSasha Levin 	if (kvm->cfg.net_params == NULL)
6085f225124SSasha Levin 		die("Failed adding new network device");
6095f225124SSasha Levin 
6105f225124SSasha Levin 	kvm->cfg.net_params[kvm->cfg.num_net_devices - 1] = p;
6115f225124SSasha Levin 
6125f225124SSasha Levin done:
6135f225124SSasha Levin 	free(buf);
6145f225124SSasha Levin 	return 0;
6155f225124SSasha Levin }
6165f225124SSasha Levin 
6175f225124SSasha Levin static int virtio_net__init_one(struct virtio_net_params *params)
6184f56d42cSAsias He {
619b5ee1ea7SAsias He 	int i;
6209a6d73f1SSasha Levin 	struct net_dev *ndev;
6219a6d73f1SSasha Levin 
6229a6d73f1SSasha Levin 	ndev = calloc(1, sizeof(struct net_dev));
6239a6d73f1SSasha Levin 	if (ndev == NULL)
6245f225124SSasha Levin 		return -ENOMEM;
6259a6d73f1SSasha Levin 
6269a6d73f1SSasha Levin 	list_add_tail(&ndev->list, &ndevs);
6279a6d73f1SSasha Levin 
6289a6d73f1SSasha Levin 	ndev->kvm = params->kvm;
6299a6d73f1SSasha Levin 
6309a6d73f1SSasha Levin 	mutex_init(&ndev->mutex);
6319a6d73f1SSasha Levin 	ndev->config.status = VIRTIO_NET_S_LINK_UP;
6322449f6e3SSasha Levin 
6330c54698eSAsias He 	for (i = 0 ; i < 6 ; i++) {
6349a6d73f1SSasha Levin 		ndev->config.mac[i]		= params->guest_mac[i];
6359a6d73f1SSasha Levin 		ndev->info.guest_mac.addr[i]	= params->guest_mac[i];
6369a6d73f1SSasha Levin 		ndev->info.host_mac.addr[i]	= params->host_mac[i];
6370c54698eSAsias He 	}
638f715177dSAsias He 
6399a6d73f1SSasha Levin 	ndev->mode = params->mode;
6409a6d73f1SSasha Levin 	if (ndev->mode == NET_MODE_TAP) {
6419a6d73f1SSasha Levin 		if (!virtio_net__tap_init(params, ndev))
642db051127SMichael Ellerman 			die_perror("You have requested a TAP device, but creation of one has failed because");
6439a6d73f1SSasha Levin 		ndev->ops = &tap_ops;
644b4fdde6dSAsias He 	} else {
6459a6d73f1SSasha Levin 		ndev->info.host_ip		= ntohl(inet_addr(params->host_ip));
6469a6d73f1SSasha Levin 		ndev->info.guest_ip		= ntohl(inet_addr(params->guest_ip));
6479a6d73f1SSasha Levin 		ndev->info.guest_netmask	= ntohl(inet_addr("255.255.255.0"));
6489a6d73f1SSasha Levin 		ndev->info.buf_nr		= 20,
6499a6d73f1SSasha Levin 		uip_init(&ndev->info);
6509a6d73f1SSasha Levin 		ndev->ops = &uip_ops;
651b4fdde6dSAsias He 	}
652b5ee1ea7SAsias He 
65369205aa1SAsias He 	if (params->trans && strcmp(params->trans, "mmio") == 0)
6544346fd8fSSasha Levin 		virtio_init(params->kvm, ndev, &ndev->vdev, &net_dev_virtio_ops,
65569205aa1SAsias He 			    VIRTIO_MMIO, PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET);
65669205aa1SAsias He 	else
6574346fd8fSSasha Levin 		virtio_init(params->kvm, ndev, &ndev->vdev, &net_dev_virtio_ops,
65802eca50cSAsias He 			    VIRTIO_PCI, PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET);
65927ab67f5SSasha Levin 
6609ed67cdcSSasha Levin 	if (params->vhost)
6619ed67cdcSSasha Levin 		virtio_net__vhost_init(params->kvm, ndev);
6629ed67cdcSSasha Levin 	else
6639a6d73f1SSasha Levin 		virtio_net__io_thread_init(params->kvm, ndev);
664cb83de6fSSasha Levin 
665d278197dSAsias He 	if (compat_id == -1)
66652f34d2cSAsias He 		compat_id = virtio_compat_add_message("virtio-net", "CONFIG_VIRTIO_NET");
6675f225124SSasha Levin 
6685f225124SSasha Levin 	return 0;
6695f225124SSasha Levin }
6705f225124SSasha Levin 
6715f225124SSasha Levin int virtio_net__init(struct kvm *kvm)
6725f225124SSasha Levin {
6735f225124SSasha Levin 	int i;
6745f225124SSasha Levin 
6755f225124SSasha Levin 	for (i = 0; i < kvm->cfg.num_net_devices; i++) {
6765f225124SSasha Levin 		kvm->cfg.net_params[i].kvm = kvm;
6775f225124SSasha Levin 		virtio_net__init_one(&kvm->cfg.net_params[i]);
6785f225124SSasha Levin 	}
6795f225124SSasha Levin 
6805f225124SSasha Levin 	if (kvm->cfg.num_net_devices == 0 && kvm->cfg.no_net == 0) {
6815f225124SSasha Levin 		struct virtio_net_params net_params;
6825f225124SSasha Levin 
6835f225124SSasha Levin 		net_params = (struct virtio_net_params) {
6845f225124SSasha Levin 			.guest_ip	= kvm->cfg.guest_ip,
6855f225124SSasha Levin 			.host_ip	= kvm->cfg.host_ip,
6865f225124SSasha Levin 			.kvm		= kvm,
6875f225124SSasha Levin 			.script		= kvm->cfg.script,
6885f225124SSasha Levin 			.mode		= NET_MODE_USER,
6895f225124SSasha Levin 		};
6905f225124SSasha Levin 		str_to_mac(kvm->cfg.guest_mac, net_params.guest_mac);
6915f225124SSasha Levin 		str_to_mac(kvm->cfg.host_mac, net_params.host_mac);
6925f225124SSasha Levin 
6935f225124SSasha Levin 		virtio_net__init_one(&net_params);
6945f225124SSasha Levin 	}
6955f225124SSasha Levin 
6965f225124SSasha Levin 	return 0;
6975f225124SSasha Levin }
69849a8afd1SSasha Levin virtio_dev_init(virtio_net__init);
6995f225124SSasha Levin 
7005f225124SSasha Levin int virtio_net__exit(struct kvm *kvm)
7015f225124SSasha Levin {
7025f225124SSasha Levin 	return 0;
7034f56d42cSAsias He }
70449a8afd1SSasha Levin virtio_dev_exit(virtio_net__exit);
705