1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VDUSE: vDPA Device in Userspace
4  *
5  * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
6  *
7  * Author: Xie Yongji <xieyongji@bytedance.com>
8  *
9  */
10 
11 #include "linux/virtio_net.h"
12 #include <linux/init.h>
13 #include <linux/module.h>
14 #include <linux/cdev.h>
15 #include <linux/device.h>
16 #include <linux/eventfd.h>
17 #include <linux/slab.h>
18 #include <linux/wait.h>
19 #include <linux/dma-map-ops.h>
20 #include <linux/poll.h>
21 #include <linux/file.h>
22 #include <linux/uio.h>
23 #include <linux/vdpa.h>
24 #include <linux/nospec.h>
25 #include <linux/vmalloc.h>
26 #include <linux/sched/mm.h>
27 #include <uapi/linux/vduse.h>
28 #include <uapi/linux/vdpa.h>
29 #include <uapi/linux/virtio_config.h>
30 #include <uapi/linux/virtio_ids.h>
31 #include <uapi/linux/virtio_blk.h>
32 #include <uapi/linux/virtio_ring.h>
33 #include <linux/mod_devicetable.h>
34 
35 #include "iova_domain.h"
36 
37 #define DRV_AUTHOR   "Yongji Xie <xieyongji@bytedance.com>"
38 #define DRV_DESC     "vDPA Device in Userspace"
39 #define DRV_LICENSE  "GPL v2"
40 
41 #define VDUSE_DEV_MAX (1U << MINORBITS)
42 #define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024)
43 #define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024)
44 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
45 /* 128 MB reserved for virtqueue creation */
46 #define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024)
47 #define VDUSE_MSG_DEFAULT_TIMEOUT 30
48 
49 #define IRQ_UNBOUND -1
50 
51 struct vduse_virtqueue {
52 	u16 index;
53 	u16 num_max;
54 	u32 num;
55 	u64 desc_addr;
56 	u64 driver_addr;
57 	u64 device_addr;
58 	struct vdpa_vq_state state;
59 	bool ready;
60 	bool kicked;
61 	spinlock_t kick_lock;
62 	spinlock_t irq_lock;
63 	struct eventfd_ctx *kickfd;
64 	struct vdpa_callback cb;
65 	struct work_struct inject;
66 	struct work_struct kick;
67 	int irq_effective_cpu;
68 	struct cpumask irq_affinity;
69 	struct kobject kobj;
70 };
71 
72 struct vduse_dev;
73 
74 struct vduse_vdpa {
75 	struct vdpa_device vdpa;
76 	struct vduse_dev *dev;
77 };
78 
79 struct vduse_umem {
80 	unsigned long iova;
81 	unsigned long npages;
82 	struct page **pages;
83 	struct mm_struct *mm;
84 };
85 
86 struct vduse_dev {
87 	struct vduse_vdpa *vdev;
88 	struct device *dev;
89 	struct vduse_virtqueue **vqs;
90 	struct vduse_iova_domain *domain;
91 	char *name;
92 	struct mutex lock;
93 	spinlock_t msg_lock;
94 	u64 msg_unique;
95 	u32 msg_timeout;
96 	wait_queue_head_t waitq;
97 	struct list_head send_list;
98 	struct list_head recv_list;
99 	struct vdpa_callback config_cb;
100 	struct work_struct inject;
101 	spinlock_t irq_lock;
102 	struct rw_semaphore rwsem;
103 	int minor;
104 	bool broken;
105 	bool connected;
106 	u64 api_version;
107 	u64 device_features;
108 	u64 driver_features;
109 	u32 device_id;
110 	u32 vendor_id;
111 	u32 generation;
112 	u32 config_size;
113 	void *config;
114 	u8 status;
115 	u32 vq_num;
116 	u32 vq_align;
117 	struct vduse_umem *umem;
118 	struct mutex mem_lock;
119 	unsigned int bounce_size;
120 	struct mutex domain_lock;
121 };
122 
123 struct vduse_dev_msg {
124 	struct vduse_dev_request req;
125 	struct vduse_dev_response resp;
126 	struct list_head list;
127 	wait_queue_head_t waitq;
128 	bool completed;
129 };
130 
131 struct vduse_control {
132 	u64 api_version;
133 };
134 
135 static DEFINE_MUTEX(vduse_lock);
136 static DEFINE_IDR(vduse_idr);
137 
138 static dev_t vduse_major;
139 static struct cdev vduse_ctrl_cdev;
140 static struct cdev vduse_cdev;
141 static struct workqueue_struct *vduse_irq_wq;
142 static struct workqueue_struct *vduse_irq_bound_wq;
143 
144 static u32 allowed_device_id[] = {
145 	VIRTIO_ID_BLOCK,
146 	VIRTIO_ID_NET,
147 	VIRTIO_ID_FS,
148 };
149 
vdpa_to_vduse(struct vdpa_device * vdpa)150 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
151 {
152 	struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
153 
154 	return vdev->dev;
155 }
156 
dev_to_vduse(struct device * dev)157 static inline struct vduse_dev *dev_to_vduse(struct device *dev)
158 {
159 	struct vdpa_device *vdpa = dev_to_vdpa(dev);
160 
161 	return vdpa_to_vduse(vdpa);
162 }
163 
vduse_find_msg(struct list_head * head,uint32_t request_id)164 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
165 					    uint32_t request_id)
166 {
167 	struct vduse_dev_msg *msg;
168 
169 	list_for_each_entry(msg, head, list) {
170 		if (msg->req.request_id == request_id) {
171 			list_del(&msg->list);
172 			return msg;
173 		}
174 	}
175 
176 	return NULL;
177 }
178 
vduse_dequeue_msg(struct list_head * head)179 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
180 {
181 	struct vduse_dev_msg *msg = NULL;
182 
183 	if (!list_empty(head)) {
184 		msg = list_first_entry(head, struct vduse_dev_msg, list);
185 		list_del(&msg->list);
186 	}
187 
188 	return msg;
189 }
190 
vduse_enqueue_msg(struct list_head * head,struct vduse_dev_msg * msg)191 static void vduse_enqueue_msg(struct list_head *head,
192 			      struct vduse_dev_msg *msg)
193 {
194 	list_add_tail(&msg->list, head);
195 }
196 
vduse_dev_broken(struct vduse_dev * dev)197 static void vduse_dev_broken(struct vduse_dev *dev)
198 {
199 	struct vduse_dev_msg *msg, *tmp;
200 
201 	if (unlikely(dev->broken))
202 		return;
203 
204 	list_splice_init(&dev->recv_list, &dev->send_list);
205 	list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
206 		list_del(&msg->list);
207 		msg->completed = 1;
208 		msg->resp.result = VDUSE_REQ_RESULT_FAILED;
209 		wake_up(&msg->waitq);
210 	}
211 	dev->broken = true;
212 	wake_up(&dev->waitq);
213 }
214 
vduse_dev_msg_sync(struct vduse_dev * dev,struct vduse_dev_msg * msg)215 static int vduse_dev_msg_sync(struct vduse_dev *dev,
216 			      struct vduse_dev_msg *msg)
217 {
218 	int ret;
219 
220 	if (unlikely(dev->broken))
221 		return -EIO;
222 
223 	init_waitqueue_head(&msg->waitq);
224 	spin_lock(&dev->msg_lock);
225 	if (unlikely(dev->broken)) {
226 		spin_unlock(&dev->msg_lock);
227 		return -EIO;
228 	}
229 	msg->req.request_id = dev->msg_unique++;
230 	vduse_enqueue_msg(&dev->send_list, msg);
231 	wake_up(&dev->waitq);
232 	spin_unlock(&dev->msg_lock);
233 	if (dev->msg_timeout)
234 		ret = wait_event_killable_timeout(msg->waitq, msg->completed,
235 						  (long)dev->msg_timeout * HZ);
236 	else
237 		ret = wait_event_killable(msg->waitq, msg->completed);
238 
239 	spin_lock(&dev->msg_lock);
240 	if (!msg->completed) {
241 		list_del(&msg->list);
242 		msg->resp.result = VDUSE_REQ_RESULT_FAILED;
243 		/* Mark the device as malfunction when there is a timeout */
244 		if (!ret)
245 			vduse_dev_broken(dev);
246 	}
247 	ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
248 	spin_unlock(&dev->msg_lock);
249 
250 	return ret;
251 }
252 
vduse_dev_get_vq_state_packed(struct vduse_dev * dev,struct vduse_virtqueue * vq,struct vdpa_vq_state_packed * packed)253 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
254 					 struct vduse_virtqueue *vq,
255 					 struct vdpa_vq_state_packed *packed)
256 {
257 	struct vduse_dev_msg msg = { 0 };
258 	int ret;
259 
260 	msg.req.type = VDUSE_GET_VQ_STATE;
261 	msg.req.vq_state.index = vq->index;
262 
263 	ret = vduse_dev_msg_sync(dev, &msg);
264 	if (ret)
265 		return ret;
266 
267 	packed->last_avail_counter =
268 			msg.resp.vq_state.packed.last_avail_counter & 0x0001;
269 	packed->last_avail_idx =
270 			msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
271 	packed->last_used_counter =
272 			msg.resp.vq_state.packed.last_used_counter & 0x0001;
273 	packed->last_used_idx =
274 			msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
275 
276 	return 0;
277 }
278 
vduse_dev_get_vq_state_split(struct vduse_dev * dev,struct vduse_virtqueue * vq,struct vdpa_vq_state_split * split)279 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
280 					struct vduse_virtqueue *vq,
281 					struct vdpa_vq_state_split *split)
282 {
283 	struct vduse_dev_msg msg = { 0 };
284 	int ret;
285 
286 	msg.req.type = VDUSE_GET_VQ_STATE;
287 	msg.req.vq_state.index = vq->index;
288 
289 	ret = vduse_dev_msg_sync(dev, &msg);
290 	if (ret)
291 		return ret;
292 
293 	split->avail_index = msg.resp.vq_state.split.avail_index;
294 
295 	return 0;
296 }
297 
vduse_dev_set_status(struct vduse_dev * dev,u8 status)298 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
299 {
300 	struct vduse_dev_msg msg = { 0 };
301 
302 	msg.req.type = VDUSE_SET_STATUS;
303 	msg.req.s.status = status;
304 
305 	return vduse_dev_msg_sync(dev, &msg);
306 }
307 
vduse_dev_update_iotlb(struct vduse_dev * dev,u64 start,u64 last)308 static int vduse_dev_update_iotlb(struct vduse_dev *dev,
309 				  u64 start, u64 last)
310 {
311 	struct vduse_dev_msg msg = { 0 };
312 
313 	if (last < start)
314 		return -EINVAL;
315 
316 	msg.req.type = VDUSE_UPDATE_IOTLB;
317 	msg.req.iova.start = start;
318 	msg.req.iova.last = last;
319 
320 	return vduse_dev_msg_sync(dev, &msg);
321 }
322 
vduse_dev_read_iter(struct kiocb * iocb,struct iov_iter * to)323 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
324 {
325 	struct file *file = iocb->ki_filp;
326 	struct vduse_dev *dev = file->private_data;
327 	struct vduse_dev_msg *msg;
328 	int size = sizeof(struct vduse_dev_request);
329 	ssize_t ret;
330 
331 	if (iov_iter_count(to) < size)
332 		return -EINVAL;
333 
334 	spin_lock(&dev->msg_lock);
335 	while (1) {
336 		msg = vduse_dequeue_msg(&dev->send_list);
337 		if (msg)
338 			break;
339 
340 		ret = -EAGAIN;
341 		if (file->f_flags & O_NONBLOCK)
342 			goto unlock;
343 
344 		spin_unlock(&dev->msg_lock);
345 		ret = wait_event_interruptible_exclusive(dev->waitq,
346 					!list_empty(&dev->send_list));
347 		if (ret)
348 			return ret;
349 
350 		spin_lock(&dev->msg_lock);
351 	}
352 	spin_unlock(&dev->msg_lock);
353 	ret = copy_to_iter(&msg->req, size, to);
354 	spin_lock(&dev->msg_lock);
355 	if (ret != size) {
356 		ret = -EFAULT;
357 		vduse_enqueue_msg(&dev->send_list, msg);
358 		goto unlock;
359 	}
360 	vduse_enqueue_msg(&dev->recv_list, msg);
361 unlock:
362 	spin_unlock(&dev->msg_lock);
363 
364 	return ret;
365 }
366 
is_mem_zero(const char * ptr,int size)367 static bool is_mem_zero(const char *ptr, int size)
368 {
369 	int i;
370 
371 	for (i = 0; i < size; i++) {
372 		if (ptr[i])
373 			return false;
374 	}
375 	return true;
376 }
377 
vduse_dev_write_iter(struct kiocb * iocb,struct iov_iter * from)378 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
379 {
380 	struct file *file = iocb->ki_filp;
381 	struct vduse_dev *dev = file->private_data;
382 	struct vduse_dev_response resp;
383 	struct vduse_dev_msg *msg;
384 	size_t ret;
385 
386 	ret = copy_from_iter(&resp, sizeof(resp), from);
387 	if (ret != sizeof(resp))
388 		return -EINVAL;
389 
390 	if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
391 		return -EINVAL;
392 
393 	spin_lock(&dev->msg_lock);
394 	msg = vduse_find_msg(&dev->recv_list, resp.request_id);
395 	if (!msg) {
396 		ret = -ENOENT;
397 		goto unlock;
398 	}
399 
400 	memcpy(&msg->resp, &resp, sizeof(resp));
401 	msg->completed = 1;
402 	wake_up(&msg->waitq);
403 unlock:
404 	spin_unlock(&dev->msg_lock);
405 
406 	return ret;
407 }
408 
vduse_dev_poll(struct file * file,poll_table * wait)409 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
410 {
411 	struct vduse_dev *dev = file->private_data;
412 	__poll_t mask = 0;
413 
414 	poll_wait(file, &dev->waitq, wait);
415 
416 	spin_lock(&dev->msg_lock);
417 
418 	if (unlikely(dev->broken))
419 		mask |= EPOLLERR;
420 	if (!list_empty(&dev->send_list))
421 		mask |= EPOLLIN | EPOLLRDNORM;
422 	if (!list_empty(&dev->recv_list))
423 		mask |= EPOLLOUT | EPOLLWRNORM;
424 
425 	spin_unlock(&dev->msg_lock);
426 
427 	return mask;
428 }
429 
vduse_dev_reset(struct vduse_dev * dev)430 static void vduse_dev_reset(struct vduse_dev *dev)
431 {
432 	int i;
433 	struct vduse_iova_domain *domain = dev->domain;
434 
435 	/* The coherent mappings are handled in vduse_dev_free_coherent() */
436 	if (domain && domain->bounce_map)
437 		vduse_domain_reset_bounce_map(domain);
438 
439 	down_write(&dev->rwsem);
440 
441 	dev->status = 0;
442 	dev->driver_features = 0;
443 	dev->generation++;
444 	spin_lock(&dev->irq_lock);
445 	dev->config_cb.callback = NULL;
446 	dev->config_cb.private = NULL;
447 	spin_unlock(&dev->irq_lock);
448 	flush_work(&dev->inject);
449 
450 	for (i = 0; i < dev->vq_num; i++) {
451 		struct vduse_virtqueue *vq = dev->vqs[i];
452 
453 		vq->ready = false;
454 		vq->desc_addr = 0;
455 		vq->driver_addr = 0;
456 		vq->device_addr = 0;
457 		vq->num = 0;
458 		memset(&vq->state, 0, sizeof(vq->state));
459 
460 		spin_lock(&vq->kick_lock);
461 		vq->kicked = false;
462 		if (vq->kickfd)
463 			eventfd_ctx_put(vq->kickfd);
464 		vq->kickfd = NULL;
465 		spin_unlock(&vq->kick_lock);
466 
467 		spin_lock(&vq->irq_lock);
468 		vq->cb.callback = NULL;
469 		vq->cb.private = NULL;
470 		vq->cb.trigger = NULL;
471 		spin_unlock(&vq->irq_lock);
472 		flush_work(&vq->inject);
473 		flush_work(&vq->kick);
474 	}
475 
476 	up_write(&dev->rwsem);
477 }
478 
vduse_vdpa_set_vq_address(struct vdpa_device * vdpa,u16 idx,u64 desc_area,u64 driver_area,u64 device_area)479 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
480 				u64 desc_area, u64 driver_area,
481 				u64 device_area)
482 {
483 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
484 	struct vduse_virtqueue *vq = dev->vqs[idx];
485 
486 	vq->desc_addr = desc_area;
487 	vq->driver_addr = driver_area;
488 	vq->device_addr = device_area;
489 
490 	return 0;
491 }
492 
vduse_vq_kick(struct vduse_virtqueue * vq)493 static void vduse_vq_kick(struct vduse_virtqueue *vq)
494 {
495 	spin_lock(&vq->kick_lock);
496 	if (!vq->ready)
497 		goto unlock;
498 
499 	if (vq->kickfd)
500 		eventfd_signal(vq->kickfd);
501 	else
502 		vq->kicked = true;
503 unlock:
504 	spin_unlock(&vq->kick_lock);
505 }
506 
vduse_vq_kick_work(struct work_struct * work)507 static void vduse_vq_kick_work(struct work_struct *work)
508 {
509 	struct vduse_virtqueue *vq = container_of(work,
510 					struct vduse_virtqueue, kick);
511 
512 	vduse_vq_kick(vq);
513 }
514 
vduse_vdpa_kick_vq(struct vdpa_device * vdpa,u16 idx)515 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
516 {
517 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
518 	struct vduse_virtqueue *vq = dev->vqs[idx];
519 
520 	if (!eventfd_signal_allowed()) {
521 		schedule_work(&vq->kick);
522 		return;
523 	}
524 	vduse_vq_kick(vq);
525 }
526 
vduse_vdpa_set_vq_cb(struct vdpa_device * vdpa,u16 idx,struct vdpa_callback * cb)527 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
528 			      struct vdpa_callback *cb)
529 {
530 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
531 	struct vduse_virtqueue *vq = dev->vqs[idx];
532 
533 	spin_lock(&vq->irq_lock);
534 	vq->cb.callback = cb->callback;
535 	vq->cb.private = cb->private;
536 	vq->cb.trigger = cb->trigger;
537 	spin_unlock(&vq->irq_lock);
538 }
539 
vduse_vdpa_set_vq_num(struct vdpa_device * vdpa,u16 idx,u32 num)540 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
541 {
542 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
543 	struct vduse_virtqueue *vq = dev->vqs[idx];
544 
545 	vq->num = num;
546 }
547 
vduse_vdpa_get_vq_size(struct vdpa_device * vdpa,u16 idx)548 static u16 vduse_vdpa_get_vq_size(struct vdpa_device *vdpa, u16 idx)
549 {
550 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
551 	struct vduse_virtqueue *vq = dev->vqs[idx];
552 
553 	if (vq->num)
554 		return vq->num;
555 	else
556 		return vq->num_max;
557 }
558 
vduse_vdpa_set_vq_ready(struct vdpa_device * vdpa,u16 idx,bool ready)559 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
560 					u16 idx, bool ready)
561 {
562 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
563 	struct vduse_virtqueue *vq = dev->vqs[idx];
564 
565 	vq->ready = ready;
566 }
567 
vduse_vdpa_get_vq_ready(struct vdpa_device * vdpa,u16 idx)568 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
569 {
570 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
571 	struct vduse_virtqueue *vq = dev->vqs[idx];
572 
573 	return vq->ready;
574 }
575 
vduse_vdpa_set_vq_state(struct vdpa_device * vdpa,u16 idx,const struct vdpa_vq_state * state)576 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
577 				const struct vdpa_vq_state *state)
578 {
579 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
580 	struct vduse_virtqueue *vq = dev->vqs[idx];
581 
582 	if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
583 		vq->state.packed.last_avail_counter =
584 				state->packed.last_avail_counter;
585 		vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
586 		vq->state.packed.last_used_counter =
587 				state->packed.last_used_counter;
588 		vq->state.packed.last_used_idx = state->packed.last_used_idx;
589 	} else
590 		vq->state.split.avail_index = state->split.avail_index;
591 
592 	return 0;
593 }
594 
vduse_vdpa_get_vq_state(struct vdpa_device * vdpa,u16 idx,struct vdpa_vq_state * state)595 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
596 				struct vdpa_vq_state *state)
597 {
598 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
599 	struct vduse_virtqueue *vq = dev->vqs[idx];
600 
601 	if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
602 		return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
603 
604 	return vduse_dev_get_vq_state_split(dev, vq, &state->split);
605 }
606 
vduse_vdpa_get_vq_align(struct vdpa_device * vdpa)607 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
608 {
609 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
610 
611 	return dev->vq_align;
612 }
613 
vduse_vdpa_get_device_features(struct vdpa_device * vdpa)614 static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
615 {
616 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
617 
618 	return dev->device_features;
619 }
620 
vduse_vdpa_set_driver_features(struct vdpa_device * vdpa,u64 features)621 static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
622 {
623 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
624 
625 	dev->driver_features = features;
626 	return 0;
627 }
628 
vduse_vdpa_get_driver_features(struct vdpa_device * vdpa)629 static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
630 {
631 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
632 
633 	return dev->driver_features;
634 }
635 
vduse_vdpa_set_config_cb(struct vdpa_device * vdpa,struct vdpa_callback * cb)636 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
637 				  struct vdpa_callback *cb)
638 {
639 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
640 
641 	spin_lock(&dev->irq_lock);
642 	dev->config_cb.callback = cb->callback;
643 	dev->config_cb.private = cb->private;
644 	spin_unlock(&dev->irq_lock);
645 }
646 
vduse_vdpa_get_vq_num_max(struct vdpa_device * vdpa)647 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
648 {
649 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
650 	u16 num_max = 0;
651 	int i;
652 
653 	for (i = 0; i < dev->vq_num; i++)
654 		if (num_max < dev->vqs[i]->num_max)
655 			num_max = dev->vqs[i]->num_max;
656 
657 	return num_max;
658 }
659 
vduse_vdpa_get_device_id(struct vdpa_device * vdpa)660 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
661 {
662 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
663 
664 	return dev->device_id;
665 }
666 
vduse_vdpa_get_vendor_id(struct vdpa_device * vdpa)667 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
668 {
669 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
670 
671 	return dev->vendor_id;
672 }
673 
vduse_vdpa_get_status(struct vdpa_device * vdpa)674 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
675 {
676 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
677 
678 	return dev->status;
679 }
680 
vduse_vdpa_set_status(struct vdpa_device * vdpa,u8 status)681 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
682 {
683 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
684 
685 	if (vduse_dev_set_status(dev, status))
686 		return;
687 
688 	dev->status = status;
689 }
690 
vduse_vdpa_get_config_size(struct vdpa_device * vdpa)691 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
692 {
693 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
694 
695 	return dev->config_size;
696 }
697 
vduse_vdpa_get_config(struct vdpa_device * vdpa,unsigned int offset,void * buf,unsigned int len)698 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
699 				  void *buf, unsigned int len)
700 {
701 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
702 
703 	/* Initialize the buffer in case of partial copy. */
704 	memset(buf, 0, len);
705 
706 	if (offset > dev->config_size)
707 		return;
708 
709 	if (len > dev->config_size - offset)
710 		len = dev->config_size - offset;
711 
712 	memcpy(buf, dev->config + offset, len);
713 }
714 
vduse_vdpa_set_config(struct vdpa_device * vdpa,unsigned int offset,const void * buf,unsigned int len)715 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
716 			const void *buf, unsigned int len)
717 {
718 	/* Now we only support read-only configuration space */
719 }
720 
vduse_vdpa_reset(struct vdpa_device * vdpa)721 static int vduse_vdpa_reset(struct vdpa_device *vdpa)
722 {
723 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
724 	int ret = vduse_dev_set_status(dev, 0);
725 
726 	vduse_dev_reset(dev);
727 
728 	return ret;
729 }
730 
vduse_vdpa_get_generation(struct vdpa_device * vdpa)731 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
732 {
733 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
734 
735 	return dev->generation;
736 }
737 
vduse_vdpa_set_vq_affinity(struct vdpa_device * vdpa,u16 idx,const struct cpumask * cpu_mask)738 static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
739 				      const struct cpumask *cpu_mask)
740 {
741 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
742 
743 	if (cpu_mask)
744 		cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
745 	else
746 		cpumask_setall(&dev->vqs[idx]->irq_affinity);
747 
748 	return 0;
749 }
750 
751 static const struct cpumask *
vduse_vdpa_get_vq_affinity(struct vdpa_device * vdpa,u16 idx)752 vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx)
753 {
754 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
755 
756 	return &dev->vqs[idx]->irq_affinity;
757 }
758 
vduse_vdpa_set_map(struct vdpa_device * vdpa,unsigned int asid,struct vhost_iotlb * iotlb)759 static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
760 				unsigned int asid,
761 				struct vhost_iotlb *iotlb)
762 {
763 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
764 	int ret;
765 
766 	ret = vduse_domain_set_map(dev->domain, iotlb);
767 	if (ret)
768 		return ret;
769 
770 	ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
771 	if (ret) {
772 		vduse_domain_clear_map(dev->domain, iotlb);
773 		return ret;
774 	}
775 
776 	return 0;
777 }
778 
vduse_vdpa_free(struct vdpa_device * vdpa)779 static void vduse_vdpa_free(struct vdpa_device *vdpa)
780 {
781 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
782 
783 	dev->vdev = NULL;
784 }
785 
786 static const struct vdpa_config_ops vduse_vdpa_config_ops = {
787 	.set_vq_address		= vduse_vdpa_set_vq_address,
788 	.kick_vq		= vduse_vdpa_kick_vq,
789 	.set_vq_cb		= vduse_vdpa_set_vq_cb,
790 	.set_vq_num             = vduse_vdpa_set_vq_num,
791 	.get_vq_size		= vduse_vdpa_get_vq_size,
792 	.set_vq_ready		= vduse_vdpa_set_vq_ready,
793 	.get_vq_ready		= vduse_vdpa_get_vq_ready,
794 	.set_vq_state		= vduse_vdpa_set_vq_state,
795 	.get_vq_state		= vduse_vdpa_get_vq_state,
796 	.get_vq_align		= vduse_vdpa_get_vq_align,
797 	.get_device_features	= vduse_vdpa_get_device_features,
798 	.set_driver_features	= vduse_vdpa_set_driver_features,
799 	.get_driver_features	= vduse_vdpa_get_driver_features,
800 	.set_config_cb		= vduse_vdpa_set_config_cb,
801 	.get_vq_num_max		= vduse_vdpa_get_vq_num_max,
802 	.get_device_id		= vduse_vdpa_get_device_id,
803 	.get_vendor_id		= vduse_vdpa_get_vendor_id,
804 	.get_status		= vduse_vdpa_get_status,
805 	.set_status		= vduse_vdpa_set_status,
806 	.get_config_size	= vduse_vdpa_get_config_size,
807 	.get_config		= vduse_vdpa_get_config,
808 	.set_config		= vduse_vdpa_set_config,
809 	.get_generation		= vduse_vdpa_get_generation,
810 	.set_vq_affinity	= vduse_vdpa_set_vq_affinity,
811 	.get_vq_affinity	= vduse_vdpa_get_vq_affinity,
812 	.reset			= vduse_vdpa_reset,
813 	.set_map		= vduse_vdpa_set_map,
814 	.free			= vduse_vdpa_free,
815 };
816 
vduse_dev_sync_single_for_device(struct device * dev,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir)817 static void vduse_dev_sync_single_for_device(struct device *dev,
818 					     dma_addr_t dma_addr, size_t size,
819 					     enum dma_data_direction dir)
820 {
821 	struct vduse_dev *vdev = dev_to_vduse(dev);
822 	struct vduse_iova_domain *domain = vdev->domain;
823 
824 	vduse_domain_sync_single_for_device(domain, dma_addr, size, dir);
825 }
826 
vduse_dev_sync_single_for_cpu(struct device * dev,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir)827 static void vduse_dev_sync_single_for_cpu(struct device *dev,
828 					     dma_addr_t dma_addr, size_t size,
829 					     enum dma_data_direction dir)
830 {
831 	struct vduse_dev *vdev = dev_to_vduse(dev);
832 	struct vduse_iova_domain *domain = vdev->domain;
833 
834 	vduse_domain_sync_single_for_cpu(domain, dma_addr, size, dir);
835 }
836 
vduse_dev_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,unsigned long attrs)837 static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
838 				     unsigned long offset, size_t size,
839 				     enum dma_data_direction dir,
840 				     unsigned long attrs)
841 {
842 	struct vduse_dev *vdev = dev_to_vduse(dev);
843 	struct vduse_iova_domain *domain = vdev->domain;
844 
845 	return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
846 }
847 
vduse_dev_unmap_page(struct device * dev,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)848 static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
849 				size_t size, enum dma_data_direction dir,
850 				unsigned long attrs)
851 {
852 	struct vduse_dev *vdev = dev_to_vduse(dev);
853 	struct vduse_iova_domain *domain = vdev->domain;
854 
855 	return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
856 }
857 
vduse_dev_alloc_coherent(struct device * dev,size_t size,dma_addr_t * dma_addr,gfp_t flag,unsigned long attrs)858 static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
859 					dma_addr_t *dma_addr, gfp_t flag,
860 					unsigned long attrs)
861 {
862 	struct vduse_dev *vdev = dev_to_vduse(dev);
863 	struct vduse_iova_domain *domain = vdev->domain;
864 	unsigned long iova;
865 	void *addr;
866 
867 	*dma_addr = DMA_MAPPING_ERROR;
868 	addr = vduse_domain_alloc_coherent(domain, size,
869 				(dma_addr_t *)&iova, flag, attrs);
870 	if (!addr)
871 		return NULL;
872 
873 	*dma_addr = (dma_addr_t)iova;
874 
875 	return addr;
876 }
877 
vduse_dev_free_coherent(struct device * dev,size_t size,void * vaddr,dma_addr_t dma_addr,unsigned long attrs)878 static void vduse_dev_free_coherent(struct device *dev, size_t size,
879 					void *vaddr, dma_addr_t dma_addr,
880 					unsigned long attrs)
881 {
882 	struct vduse_dev *vdev = dev_to_vduse(dev);
883 	struct vduse_iova_domain *domain = vdev->domain;
884 
885 	vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
886 }
887 
vduse_dev_max_mapping_size(struct device * dev)888 static size_t vduse_dev_max_mapping_size(struct device *dev)
889 {
890 	struct vduse_dev *vdev = dev_to_vduse(dev);
891 	struct vduse_iova_domain *domain = vdev->domain;
892 
893 	return domain->bounce_size;
894 }
895 
896 static const struct dma_map_ops vduse_dev_dma_ops = {
897 	.sync_single_for_device = vduse_dev_sync_single_for_device,
898 	.sync_single_for_cpu = vduse_dev_sync_single_for_cpu,
899 	.map_page = vduse_dev_map_page,
900 	.unmap_page = vduse_dev_unmap_page,
901 	.alloc = vduse_dev_alloc_coherent,
902 	.free = vduse_dev_free_coherent,
903 	.max_mapping_size = vduse_dev_max_mapping_size,
904 };
905 
perm_to_file_flags(u8 perm)906 static unsigned int perm_to_file_flags(u8 perm)
907 {
908 	unsigned int flags = 0;
909 
910 	switch (perm) {
911 	case VDUSE_ACCESS_WO:
912 		flags |= O_WRONLY;
913 		break;
914 	case VDUSE_ACCESS_RO:
915 		flags |= O_RDONLY;
916 		break;
917 	case VDUSE_ACCESS_RW:
918 		flags |= O_RDWR;
919 		break;
920 	default:
921 		WARN(1, "invalidate vhost IOTLB permission\n");
922 		break;
923 	}
924 
925 	return flags;
926 }
927 
vduse_kickfd_setup(struct vduse_dev * dev,struct vduse_vq_eventfd * eventfd)928 static int vduse_kickfd_setup(struct vduse_dev *dev,
929 			struct vduse_vq_eventfd *eventfd)
930 {
931 	struct eventfd_ctx *ctx = NULL;
932 	struct vduse_virtqueue *vq;
933 	u32 index;
934 
935 	if (eventfd->index >= dev->vq_num)
936 		return -EINVAL;
937 
938 	index = array_index_nospec(eventfd->index, dev->vq_num);
939 	vq = dev->vqs[index];
940 	if (eventfd->fd >= 0) {
941 		ctx = eventfd_ctx_fdget(eventfd->fd);
942 		if (IS_ERR(ctx))
943 			return PTR_ERR(ctx);
944 	} else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
945 		return 0;
946 
947 	spin_lock(&vq->kick_lock);
948 	if (vq->kickfd)
949 		eventfd_ctx_put(vq->kickfd);
950 	vq->kickfd = ctx;
951 	if (vq->ready && vq->kicked && vq->kickfd) {
952 		eventfd_signal(vq->kickfd);
953 		vq->kicked = false;
954 	}
955 	spin_unlock(&vq->kick_lock);
956 
957 	return 0;
958 }
959 
vduse_dev_is_ready(struct vduse_dev * dev)960 static bool vduse_dev_is_ready(struct vduse_dev *dev)
961 {
962 	int i;
963 
964 	for (i = 0; i < dev->vq_num; i++)
965 		if (!dev->vqs[i]->num_max)
966 			return false;
967 
968 	return true;
969 }
970 
vduse_dev_irq_inject(struct work_struct * work)971 static void vduse_dev_irq_inject(struct work_struct *work)
972 {
973 	struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
974 
975 	spin_lock_bh(&dev->irq_lock);
976 	if (dev->config_cb.callback)
977 		dev->config_cb.callback(dev->config_cb.private);
978 	spin_unlock_bh(&dev->irq_lock);
979 }
980 
vduse_vq_irq_inject(struct work_struct * work)981 static void vduse_vq_irq_inject(struct work_struct *work)
982 {
983 	struct vduse_virtqueue *vq = container_of(work,
984 					struct vduse_virtqueue, inject);
985 
986 	spin_lock_bh(&vq->irq_lock);
987 	if (vq->ready && vq->cb.callback)
988 		vq->cb.callback(vq->cb.private);
989 	spin_unlock_bh(&vq->irq_lock);
990 }
991 
vduse_vq_signal_irqfd(struct vduse_virtqueue * vq)992 static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
993 {
994 	bool signal = false;
995 
996 	if (!vq->cb.trigger)
997 		return false;
998 
999 	spin_lock_irq(&vq->irq_lock);
1000 	if (vq->ready && vq->cb.trigger) {
1001 		eventfd_signal(vq->cb.trigger);
1002 		signal = true;
1003 	}
1004 	spin_unlock_irq(&vq->irq_lock);
1005 
1006 	return signal;
1007 }
1008 
vduse_dev_queue_irq_work(struct vduse_dev * dev,struct work_struct * irq_work,int irq_effective_cpu)1009 static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
1010 				    struct work_struct *irq_work,
1011 				    int irq_effective_cpu)
1012 {
1013 	int ret = -EINVAL;
1014 
1015 	down_read(&dev->rwsem);
1016 	if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1017 		goto unlock;
1018 
1019 	ret = 0;
1020 	if (irq_effective_cpu == IRQ_UNBOUND)
1021 		queue_work(vduse_irq_wq, irq_work);
1022 	else
1023 		queue_work_on(irq_effective_cpu,
1024 			      vduse_irq_bound_wq, irq_work);
1025 unlock:
1026 	up_read(&dev->rwsem);
1027 
1028 	return ret;
1029 }
1030 
vduse_dev_dereg_umem(struct vduse_dev * dev,u64 iova,u64 size)1031 static int vduse_dev_dereg_umem(struct vduse_dev *dev,
1032 				u64 iova, u64 size)
1033 {
1034 	int ret;
1035 
1036 	mutex_lock(&dev->mem_lock);
1037 	ret = -ENOENT;
1038 	if (!dev->umem)
1039 		goto unlock;
1040 
1041 	ret = -EINVAL;
1042 	if (!dev->domain)
1043 		goto unlock;
1044 
1045 	if (dev->umem->iova != iova || size != dev->domain->bounce_size)
1046 		goto unlock;
1047 
1048 	vduse_domain_remove_user_bounce_pages(dev->domain);
1049 	unpin_user_pages_dirty_lock(dev->umem->pages,
1050 				    dev->umem->npages, true);
1051 	atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
1052 	mmdrop(dev->umem->mm);
1053 	vfree(dev->umem->pages);
1054 	kfree(dev->umem);
1055 	dev->umem = NULL;
1056 	ret = 0;
1057 unlock:
1058 	mutex_unlock(&dev->mem_lock);
1059 	return ret;
1060 }
1061 
vduse_dev_reg_umem(struct vduse_dev * dev,u64 iova,u64 uaddr,u64 size)1062 static int vduse_dev_reg_umem(struct vduse_dev *dev,
1063 			      u64 iova, u64 uaddr, u64 size)
1064 {
1065 	struct page **page_list = NULL;
1066 	struct vduse_umem *umem = NULL;
1067 	long pinned = 0;
1068 	unsigned long npages, lock_limit;
1069 	int ret;
1070 
1071 	if (!dev->domain || !dev->domain->bounce_map ||
1072 	    size != dev->domain->bounce_size ||
1073 	    iova != 0 || uaddr & ~PAGE_MASK)
1074 		return -EINVAL;
1075 
1076 	mutex_lock(&dev->mem_lock);
1077 	ret = -EEXIST;
1078 	if (dev->umem)
1079 		goto unlock;
1080 
1081 	ret = -ENOMEM;
1082 	npages = size >> PAGE_SHIFT;
1083 	page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
1084 			      GFP_KERNEL_ACCOUNT);
1085 	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
1086 	if (!page_list || !umem)
1087 		goto unlock;
1088 
1089 	mmap_read_lock(current->mm);
1090 
1091 	lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
1092 	if (npages + atomic64_read(&current->mm->pinned_vm) > lock_limit)
1093 		goto out;
1094 
1095 	pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
1096 				page_list);
1097 	if (pinned != npages) {
1098 		ret = pinned < 0 ? pinned : -ENOMEM;
1099 		goto out;
1100 	}
1101 
1102 	ret = vduse_domain_add_user_bounce_pages(dev->domain,
1103 						 page_list, pinned);
1104 	if (ret)
1105 		goto out;
1106 
1107 	atomic64_add(npages, &current->mm->pinned_vm);
1108 
1109 	umem->pages = page_list;
1110 	umem->npages = pinned;
1111 	umem->iova = iova;
1112 	umem->mm = current->mm;
1113 	mmgrab(current->mm);
1114 
1115 	dev->umem = umem;
1116 out:
1117 	if (ret && pinned > 0)
1118 		unpin_user_pages(page_list, pinned);
1119 
1120 	mmap_read_unlock(current->mm);
1121 unlock:
1122 	if (ret) {
1123 		vfree(page_list);
1124 		kfree(umem);
1125 	}
1126 	mutex_unlock(&dev->mem_lock);
1127 	return ret;
1128 }
1129 
vduse_vq_update_effective_cpu(struct vduse_virtqueue * vq)1130 static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
1131 {
1132 	int curr_cpu = vq->irq_effective_cpu;
1133 
1134 	while (true) {
1135 		curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
1136 		if (cpu_online(curr_cpu))
1137 			break;
1138 
1139 		if (curr_cpu >= nr_cpu_ids)
1140 			curr_cpu = IRQ_UNBOUND;
1141 	}
1142 
1143 	vq->irq_effective_cpu = curr_cpu;
1144 }
1145 
vduse_dev_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1146 static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
1147 			    unsigned long arg)
1148 {
1149 	struct vduse_dev *dev = file->private_data;
1150 	void __user *argp = (void __user *)arg;
1151 	int ret;
1152 
1153 	if (unlikely(dev->broken))
1154 		return -EPERM;
1155 
1156 	switch (cmd) {
1157 	case VDUSE_IOTLB_GET_FD: {
1158 		struct vduse_iotlb_entry entry;
1159 		struct vhost_iotlb_map *map;
1160 		struct vdpa_map_file *map_file;
1161 		struct file *f = NULL;
1162 
1163 		ret = -EFAULT;
1164 		if (copy_from_user(&entry, argp, sizeof(entry)))
1165 			break;
1166 
1167 		ret = -EINVAL;
1168 		if (entry.start > entry.last)
1169 			break;
1170 
1171 		mutex_lock(&dev->domain_lock);
1172 		if (!dev->domain) {
1173 			mutex_unlock(&dev->domain_lock);
1174 			break;
1175 		}
1176 		spin_lock(&dev->domain->iotlb_lock);
1177 		map = vhost_iotlb_itree_first(dev->domain->iotlb,
1178 					      entry.start, entry.last);
1179 		if (map) {
1180 			map_file = (struct vdpa_map_file *)map->opaque;
1181 			f = get_file(map_file->file);
1182 			entry.offset = map_file->offset;
1183 			entry.start = map->start;
1184 			entry.last = map->last;
1185 			entry.perm = map->perm;
1186 		}
1187 		spin_unlock(&dev->domain->iotlb_lock);
1188 		mutex_unlock(&dev->domain_lock);
1189 		ret = -EINVAL;
1190 		if (!f)
1191 			break;
1192 
1193 		ret = -EFAULT;
1194 		if (copy_to_user(argp, &entry, sizeof(entry))) {
1195 			fput(f);
1196 			break;
1197 		}
1198 		ret = receive_fd(f, NULL, perm_to_file_flags(entry.perm));
1199 		fput(f);
1200 		break;
1201 	}
1202 	case VDUSE_DEV_GET_FEATURES:
1203 		/*
1204 		 * Just mirror what driver wrote here.
1205 		 * The driver is expected to check FEATURE_OK later.
1206 		 */
1207 		ret = put_user(dev->driver_features, (u64 __user *)argp);
1208 		break;
1209 	case VDUSE_DEV_SET_CONFIG: {
1210 		struct vduse_config_data config;
1211 		unsigned long size = offsetof(struct vduse_config_data,
1212 					      buffer);
1213 
1214 		ret = -EFAULT;
1215 		if (copy_from_user(&config, argp, size))
1216 			break;
1217 
1218 		ret = -EINVAL;
1219 		if (config.offset > dev->config_size ||
1220 		    config.length == 0 ||
1221 		    config.length > dev->config_size - config.offset)
1222 			break;
1223 
1224 		ret = -EFAULT;
1225 		if (copy_from_user(dev->config + config.offset, argp + size,
1226 				   config.length))
1227 			break;
1228 
1229 		ret = 0;
1230 		break;
1231 	}
1232 	case VDUSE_DEV_INJECT_CONFIG_IRQ:
1233 		ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
1234 		break;
1235 	case VDUSE_VQ_SETUP: {
1236 		struct vduse_vq_config config;
1237 		u32 index;
1238 
1239 		ret = -EFAULT;
1240 		if (copy_from_user(&config, argp, sizeof(config)))
1241 			break;
1242 
1243 		ret = -EINVAL;
1244 		if (config.index >= dev->vq_num)
1245 			break;
1246 
1247 		if (!is_mem_zero((const char *)config.reserved,
1248 				 sizeof(config.reserved)))
1249 			break;
1250 
1251 		index = array_index_nospec(config.index, dev->vq_num);
1252 		dev->vqs[index]->num_max = config.max_size;
1253 		ret = 0;
1254 		break;
1255 	}
1256 	case VDUSE_VQ_GET_INFO: {
1257 		struct vduse_vq_info vq_info;
1258 		struct vduse_virtqueue *vq;
1259 		u32 index;
1260 
1261 		ret = -EFAULT;
1262 		if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
1263 			break;
1264 
1265 		ret = -EINVAL;
1266 		if (vq_info.index >= dev->vq_num)
1267 			break;
1268 
1269 		index = array_index_nospec(vq_info.index, dev->vq_num);
1270 		vq = dev->vqs[index];
1271 		vq_info.desc_addr = vq->desc_addr;
1272 		vq_info.driver_addr = vq->driver_addr;
1273 		vq_info.device_addr = vq->device_addr;
1274 		vq_info.num = vq->num;
1275 
1276 		if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
1277 			vq_info.packed.last_avail_counter =
1278 				vq->state.packed.last_avail_counter;
1279 			vq_info.packed.last_avail_idx =
1280 				vq->state.packed.last_avail_idx;
1281 			vq_info.packed.last_used_counter =
1282 				vq->state.packed.last_used_counter;
1283 			vq_info.packed.last_used_idx =
1284 				vq->state.packed.last_used_idx;
1285 		} else
1286 			vq_info.split.avail_index =
1287 				vq->state.split.avail_index;
1288 
1289 		vq_info.ready = vq->ready;
1290 
1291 		ret = -EFAULT;
1292 		if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
1293 			break;
1294 
1295 		ret = 0;
1296 		break;
1297 	}
1298 	case VDUSE_VQ_SETUP_KICKFD: {
1299 		struct vduse_vq_eventfd eventfd;
1300 
1301 		ret = -EFAULT;
1302 		if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
1303 			break;
1304 
1305 		ret = vduse_kickfd_setup(dev, &eventfd);
1306 		break;
1307 	}
1308 	case VDUSE_VQ_INJECT_IRQ: {
1309 		u32 index;
1310 
1311 		ret = -EFAULT;
1312 		if (get_user(index, (u32 __user *)argp))
1313 			break;
1314 
1315 		ret = -EINVAL;
1316 		if (index >= dev->vq_num)
1317 			break;
1318 
1319 		ret = 0;
1320 		index = array_index_nospec(index, dev->vq_num);
1321 		if (!vduse_vq_signal_irqfd(dev->vqs[index])) {
1322 			vduse_vq_update_effective_cpu(dev->vqs[index]);
1323 			ret = vduse_dev_queue_irq_work(dev,
1324 						&dev->vqs[index]->inject,
1325 						dev->vqs[index]->irq_effective_cpu);
1326 		}
1327 		break;
1328 	}
1329 	case VDUSE_IOTLB_REG_UMEM: {
1330 		struct vduse_iova_umem umem;
1331 
1332 		ret = -EFAULT;
1333 		if (copy_from_user(&umem, argp, sizeof(umem)))
1334 			break;
1335 
1336 		ret = -EINVAL;
1337 		if (!is_mem_zero((const char *)umem.reserved,
1338 				 sizeof(umem.reserved)))
1339 			break;
1340 
1341 		mutex_lock(&dev->domain_lock);
1342 		ret = vduse_dev_reg_umem(dev, umem.iova,
1343 					 umem.uaddr, umem.size);
1344 		mutex_unlock(&dev->domain_lock);
1345 		break;
1346 	}
1347 	case VDUSE_IOTLB_DEREG_UMEM: {
1348 		struct vduse_iova_umem umem;
1349 
1350 		ret = -EFAULT;
1351 		if (copy_from_user(&umem, argp, sizeof(umem)))
1352 			break;
1353 
1354 		ret = -EINVAL;
1355 		if (!is_mem_zero((const char *)umem.reserved,
1356 				 sizeof(umem.reserved)))
1357 			break;
1358 		mutex_lock(&dev->domain_lock);
1359 		ret = vduse_dev_dereg_umem(dev, umem.iova,
1360 					   umem.size);
1361 		mutex_unlock(&dev->domain_lock);
1362 		break;
1363 	}
1364 	case VDUSE_IOTLB_GET_INFO: {
1365 		struct vduse_iova_info info;
1366 		struct vhost_iotlb_map *map;
1367 
1368 		ret = -EFAULT;
1369 		if (copy_from_user(&info, argp, sizeof(info)))
1370 			break;
1371 
1372 		ret = -EINVAL;
1373 		if (info.start > info.last)
1374 			break;
1375 
1376 		if (!is_mem_zero((const char *)info.reserved,
1377 				 sizeof(info.reserved)))
1378 			break;
1379 
1380 		mutex_lock(&dev->domain_lock);
1381 		if (!dev->domain) {
1382 			mutex_unlock(&dev->domain_lock);
1383 			break;
1384 		}
1385 		spin_lock(&dev->domain->iotlb_lock);
1386 		map = vhost_iotlb_itree_first(dev->domain->iotlb,
1387 					      info.start, info.last);
1388 		if (map) {
1389 			info.start = map->start;
1390 			info.last = map->last;
1391 			info.capability = 0;
1392 			if (dev->domain->bounce_map && map->start == 0 &&
1393 			    map->last == dev->domain->bounce_size - 1)
1394 				info.capability |= VDUSE_IOVA_CAP_UMEM;
1395 		}
1396 		spin_unlock(&dev->domain->iotlb_lock);
1397 		mutex_unlock(&dev->domain_lock);
1398 		if (!map)
1399 			break;
1400 
1401 		ret = -EFAULT;
1402 		if (copy_to_user(argp, &info, sizeof(info)))
1403 			break;
1404 
1405 		ret = 0;
1406 		break;
1407 	}
1408 	default:
1409 		ret = -ENOIOCTLCMD;
1410 		break;
1411 	}
1412 
1413 	return ret;
1414 }
1415 
vduse_dev_release(struct inode * inode,struct file * file)1416 static int vduse_dev_release(struct inode *inode, struct file *file)
1417 {
1418 	struct vduse_dev *dev = file->private_data;
1419 
1420 	mutex_lock(&dev->domain_lock);
1421 	if (dev->domain)
1422 		vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
1423 	mutex_unlock(&dev->domain_lock);
1424 	spin_lock(&dev->msg_lock);
1425 	/* Make sure the inflight messages can processed after reconncection */
1426 	list_splice_init(&dev->recv_list, &dev->send_list);
1427 	spin_unlock(&dev->msg_lock);
1428 	dev->connected = false;
1429 
1430 	return 0;
1431 }
1432 
vduse_dev_get_from_minor(int minor)1433 static struct vduse_dev *vduse_dev_get_from_minor(int minor)
1434 {
1435 	struct vduse_dev *dev;
1436 
1437 	mutex_lock(&vduse_lock);
1438 	dev = idr_find(&vduse_idr, minor);
1439 	mutex_unlock(&vduse_lock);
1440 
1441 	return dev;
1442 }
1443 
vduse_dev_open(struct inode * inode,struct file * file)1444 static int vduse_dev_open(struct inode *inode, struct file *file)
1445 {
1446 	int ret;
1447 	struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
1448 
1449 	if (!dev)
1450 		return -ENODEV;
1451 
1452 	ret = -EBUSY;
1453 	mutex_lock(&dev->lock);
1454 	if (dev->connected)
1455 		goto unlock;
1456 
1457 	ret = 0;
1458 	dev->connected = true;
1459 	file->private_data = dev;
1460 unlock:
1461 	mutex_unlock(&dev->lock);
1462 
1463 	return ret;
1464 }
1465 
1466 static const struct file_operations vduse_dev_fops = {
1467 	.owner		= THIS_MODULE,
1468 	.open		= vduse_dev_open,
1469 	.release	= vduse_dev_release,
1470 	.read_iter	= vduse_dev_read_iter,
1471 	.write_iter	= vduse_dev_write_iter,
1472 	.poll		= vduse_dev_poll,
1473 	.unlocked_ioctl	= vduse_dev_ioctl,
1474 	.compat_ioctl	= compat_ptr_ioctl,
1475 	.llseek		= noop_llseek,
1476 };
1477 
irq_cb_affinity_show(struct vduse_virtqueue * vq,char * buf)1478 static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf)
1479 {
1480 	return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity));
1481 }
1482 
irq_cb_affinity_store(struct vduse_virtqueue * vq,const char * buf,size_t count)1483 static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq,
1484 				     const char *buf, size_t count)
1485 {
1486 	cpumask_var_t new_value;
1487 	int ret;
1488 
1489 	if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
1490 		return -ENOMEM;
1491 
1492 	ret = cpumask_parse(buf, new_value);
1493 	if (ret)
1494 		goto free_mask;
1495 
1496 	ret = -EINVAL;
1497 	if (!cpumask_intersects(new_value, cpu_online_mask))
1498 		goto free_mask;
1499 
1500 	cpumask_copy(&vq->irq_affinity, new_value);
1501 	ret = count;
1502 free_mask:
1503 	free_cpumask_var(new_value);
1504 	return ret;
1505 }
1506 
1507 struct vq_sysfs_entry {
1508 	struct attribute attr;
1509 	ssize_t (*show)(struct vduse_virtqueue *vq, char *buf);
1510 	ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf,
1511 			 size_t count);
1512 };
1513 
1514 static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity);
1515 
1516 static struct attribute *vq_attrs[] = {
1517 	&irq_cb_affinity_attr.attr,
1518 	NULL,
1519 };
1520 ATTRIBUTE_GROUPS(vq);
1521 
vq_attr_show(struct kobject * kobj,struct attribute * attr,char * buf)1522 static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr,
1523 			    char *buf)
1524 {
1525 	struct vduse_virtqueue *vq = container_of(kobj,
1526 					struct vduse_virtqueue, kobj);
1527 	struct vq_sysfs_entry *entry = container_of(attr,
1528 					struct vq_sysfs_entry, attr);
1529 
1530 	if (!entry->show)
1531 		return -EIO;
1532 
1533 	return entry->show(vq, buf);
1534 }
1535 
vq_attr_store(struct kobject * kobj,struct attribute * attr,const char * buf,size_t count)1536 static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr,
1537 			     const char *buf, size_t count)
1538 {
1539 	struct vduse_virtqueue *vq = container_of(kobj,
1540 					struct vduse_virtqueue, kobj);
1541 	struct vq_sysfs_entry *entry = container_of(attr,
1542 					struct vq_sysfs_entry, attr);
1543 
1544 	if (!entry->store)
1545 		return -EIO;
1546 
1547 	return entry->store(vq, buf, count);
1548 }
1549 
1550 static const struct sysfs_ops vq_sysfs_ops = {
1551 	.show = vq_attr_show,
1552 	.store = vq_attr_store,
1553 };
1554 
vq_release(struct kobject * kobj)1555 static void vq_release(struct kobject *kobj)
1556 {
1557 	struct vduse_virtqueue *vq = container_of(kobj,
1558 					struct vduse_virtqueue, kobj);
1559 	kfree(vq);
1560 }
1561 
1562 static const struct kobj_type vq_type = {
1563 	.release	= vq_release,
1564 	.sysfs_ops	= &vq_sysfs_ops,
1565 	.default_groups	= vq_groups,
1566 };
1567 
vduse_devnode(const struct device * dev,umode_t * mode)1568 static char *vduse_devnode(const struct device *dev, umode_t *mode)
1569 {
1570 	return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
1571 }
1572 
1573 static const struct class vduse_class = {
1574 	.name = "vduse",
1575 	.devnode = vduse_devnode,
1576 };
1577 
vduse_dev_deinit_vqs(struct vduse_dev * dev)1578 static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
1579 {
1580 	int i;
1581 
1582 	if (!dev->vqs)
1583 		return;
1584 
1585 	for (i = 0; i < dev->vq_num; i++)
1586 		kobject_put(&dev->vqs[i]->kobj);
1587 	kfree(dev->vqs);
1588 }
1589 
vduse_dev_init_vqs(struct vduse_dev * dev,u32 vq_align,u32 vq_num)1590 static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
1591 {
1592 	int ret, i;
1593 
1594 	dev->vq_align = vq_align;
1595 	dev->vq_num = vq_num;
1596 	dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
1597 	if (!dev->vqs)
1598 		return -ENOMEM;
1599 
1600 	for (i = 0; i < vq_num; i++) {
1601 		dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL);
1602 		if (!dev->vqs[i]) {
1603 			ret = -ENOMEM;
1604 			goto err;
1605 		}
1606 
1607 		dev->vqs[i]->index = i;
1608 		dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
1609 		INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
1610 		INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
1611 		spin_lock_init(&dev->vqs[i]->kick_lock);
1612 		spin_lock_init(&dev->vqs[i]->irq_lock);
1613 		cpumask_setall(&dev->vqs[i]->irq_affinity);
1614 
1615 		kobject_init(&dev->vqs[i]->kobj, &vq_type);
1616 		ret = kobject_add(&dev->vqs[i]->kobj,
1617 				  &dev->dev->kobj, "vq%d", i);
1618 		if (ret) {
1619 			kfree(dev->vqs[i]);
1620 			goto err;
1621 		}
1622 	}
1623 
1624 	return 0;
1625 err:
1626 	while (i--)
1627 		kobject_put(&dev->vqs[i]->kobj);
1628 	kfree(dev->vqs);
1629 	dev->vqs = NULL;
1630 	return ret;
1631 }
1632 
vduse_dev_create(void)1633 static struct vduse_dev *vduse_dev_create(void)
1634 {
1635 	struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1636 
1637 	if (!dev)
1638 		return NULL;
1639 
1640 	mutex_init(&dev->lock);
1641 	mutex_init(&dev->mem_lock);
1642 	mutex_init(&dev->domain_lock);
1643 	spin_lock_init(&dev->msg_lock);
1644 	INIT_LIST_HEAD(&dev->send_list);
1645 	INIT_LIST_HEAD(&dev->recv_list);
1646 	spin_lock_init(&dev->irq_lock);
1647 	init_rwsem(&dev->rwsem);
1648 
1649 	INIT_WORK(&dev->inject, vduse_dev_irq_inject);
1650 	init_waitqueue_head(&dev->waitq);
1651 
1652 	return dev;
1653 }
1654 
vduse_dev_destroy(struct vduse_dev * dev)1655 static void vduse_dev_destroy(struct vduse_dev *dev)
1656 {
1657 	kfree(dev);
1658 }
1659 
vduse_find_dev(const char * name)1660 static struct vduse_dev *vduse_find_dev(const char *name)
1661 {
1662 	struct vduse_dev *dev;
1663 	int id;
1664 
1665 	idr_for_each_entry(&vduse_idr, dev, id)
1666 		if (!strcmp(dev->name, name))
1667 			return dev;
1668 
1669 	return NULL;
1670 }
1671 
vduse_destroy_dev(char * name)1672 static int vduse_destroy_dev(char *name)
1673 {
1674 	struct vduse_dev *dev = vduse_find_dev(name);
1675 
1676 	if (!dev)
1677 		return -EINVAL;
1678 
1679 	mutex_lock(&dev->lock);
1680 	if (dev->vdev || dev->connected) {
1681 		mutex_unlock(&dev->lock);
1682 		return -EBUSY;
1683 	}
1684 	dev->connected = true;
1685 	mutex_unlock(&dev->lock);
1686 
1687 	vduse_dev_reset(dev);
1688 	device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1689 	idr_remove(&vduse_idr, dev->minor);
1690 	kvfree(dev->config);
1691 	vduse_dev_deinit_vqs(dev);
1692 	if (dev->domain)
1693 		vduse_domain_destroy(dev->domain);
1694 	kfree(dev->name);
1695 	vduse_dev_destroy(dev);
1696 	module_put(THIS_MODULE);
1697 
1698 	return 0;
1699 }
1700 
device_is_allowed(u32 device_id)1701 static bool device_is_allowed(u32 device_id)
1702 {
1703 	int i;
1704 
1705 	for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
1706 		if (allowed_device_id[i] == device_id)
1707 			return true;
1708 
1709 	return false;
1710 }
1711 
features_is_valid(struct vduse_dev_config * config)1712 static bool features_is_valid(struct vduse_dev_config *config)
1713 {
1714 	if (!(config->features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1715 		return false;
1716 
1717 	/* Now we only support read-only configuration space */
1718 	if ((config->device_id == VIRTIO_ID_BLOCK) &&
1719 			(config->features & BIT_ULL(VIRTIO_BLK_F_CONFIG_WCE)))
1720 		return false;
1721 	else if ((config->device_id == VIRTIO_ID_NET) &&
1722 			(config->features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
1723 		return false;
1724 
1725 	if ((config->device_id == VIRTIO_ID_NET) &&
1726 			!(config->features & BIT_ULL(VIRTIO_F_VERSION_1)))
1727 		return false;
1728 
1729 	return true;
1730 }
1731 
vduse_validate_config(struct vduse_dev_config * config)1732 static bool vduse_validate_config(struct vduse_dev_config *config)
1733 {
1734 	if (!is_mem_zero((const char *)config->reserved,
1735 			 sizeof(config->reserved)))
1736 		return false;
1737 
1738 	if (config->vq_align > PAGE_SIZE)
1739 		return false;
1740 
1741 	if (config->config_size > PAGE_SIZE)
1742 		return false;
1743 
1744 	if (config->vq_num > 0xffff)
1745 		return false;
1746 
1747 	if (!config->name[0])
1748 		return false;
1749 
1750 	if (!device_is_allowed(config->device_id))
1751 		return false;
1752 
1753 	if (!features_is_valid(config))
1754 		return false;
1755 
1756 	return true;
1757 }
1758 
msg_timeout_show(struct device * device,struct device_attribute * attr,char * buf)1759 static ssize_t msg_timeout_show(struct device *device,
1760 				struct device_attribute *attr, char *buf)
1761 {
1762 	struct vduse_dev *dev = dev_get_drvdata(device);
1763 
1764 	return sysfs_emit(buf, "%u\n", dev->msg_timeout);
1765 }
1766 
msg_timeout_store(struct device * device,struct device_attribute * attr,const char * buf,size_t count)1767 static ssize_t msg_timeout_store(struct device *device,
1768 				 struct device_attribute *attr,
1769 				 const char *buf, size_t count)
1770 {
1771 	struct vduse_dev *dev = dev_get_drvdata(device);
1772 	int ret;
1773 
1774 	ret = kstrtouint(buf, 10, &dev->msg_timeout);
1775 	if (ret < 0)
1776 		return ret;
1777 
1778 	return count;
1779 }
1780 
1781 static DEVICE_ATTR_RW(msg_timeout);
1782 
bounce_size_show(struct device * device,struct device_attribute * attr,char * buf)1783 static ssize_t bounce_size_show(struct device *device,
1784 				struct device_attribute *attr, char *buf)
1785 {
1786 	struct vduse_dev *dev = dev_get_drvdata(device);
1787 
1788 	return sysfs_emit(buf, "%u\n", dev->bounce_size);
1789 }
1790 
bounce_size_store(struct device * device,struct device_attribute * attr,const char * buf,size_t count)1791 static ssize_t bounce_size_store(struct device *device,
1792 				 struct device_attribute *attr,
1793 				 const char *buf, size_t count)
1794 {
1795 	struct vduse_dev *dev = dev_get_drvdata(device);
1796 	unsigned int bounce_size;
1797 	int ret;
1798 
1799 	ret = -EPERM;
1800 	mutex_lock(&dev->domain_lock);
1801 	if (dev->domain)
1802 		goto unlock;
1803 
1804 	ret = kstrtouint(buf, 10, &bounce_size);
1805 	if (ret < 0)
1806 		goto unlock;
1807 
1808 	ret = -EINVAL;
1809 	if (bounce_size > VDUSE_MAX_BOUNCE_SIZE ||
1810 	    bounce_size < VDUSE_MIN_BOUNCE_SIZE)
1811 		goto unlock;
1812 
1813 	dev->bounce_size = bounce_size & PAGE_MASK;
1814 	ret = count;
1815 unlock:
1816 	mutex_unlock(&dev->domain_lock);
1817 	return ret;
1818 }
1819 
1820 static DEVICE_ATTR_RW(bounce_size);
1821 
1822 static struct attribute *vduse_dev_attrs[] = {
1823 	&dev_attr_msg_timeout.attr,
1824 	&dev_attr_bounce_size.attr,
1825 	NULL
1826 };
1827 
1828 ATTRIBUTE_GROUPS(vduse_dev);
1829 
vduse_create_dev(struct vduse_dev_config * config,void * config_buf,u64 api_version)1830 static int vduse_create_dev(struct vduse_dev_config *config,
1831 			    void *config_buf, u64 api_version)
1832 {
1833 	int ret;
1834 	struct vduse_dev *dev;
1835 
1836 	ret = -EPERM;
1837 	if ((config->device_id == VIRTIO_ID_NET) && !capable(CAP_NET_ADMIN))
1838 		goto err;
1839 
1840 	ret = -EEXIST;
1841 	if (vduse_find_dev(config->name))
1842 		goto err;
1843 
1844 	ret = -ENOMEM;
1845 	dev = vduse_dev_create();
1846 	if (!dev)
1847 		goto err;
1848 
1849 	dev->api_version = api_version;
1850 	dev->device_features = config->features;
1851 	dev->device_id = config->device_id;
1852 	dev->vendor_id = config->vendor_id;
1853 	dev->name = kstrdup(config->name, GFP_KERNEL);
1854 	if (!dev->name)
1855 		goto err_str;
1856 
1857 	dev->bounce_size = VDUSE_BOUNCE_SIZE;
1858 	dev->config = config_buf;
1859 	dev->config_size = config->config_size;
1860 
1861 	ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
1862 	if (ret < 0)
1863 		goto err_idr;
1864 
1865 	dev->minor = ret;
1866 	dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
1867 	dev->dev = device_create_with_groups(&vduse_class, NULL,
1868 				MKDEV(MAJOR(vduse_major), dev->minor),
1869 				dev, vduse_dev_groups, "%s", config->name);
1870 	if (IS_ERR(dev->dev)) {
1871 		ret = PTR_ERR(dev->dev);
1872 		goto err_dev;
1873 	}
1874 
1875 	ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num);
1876 	if (ret)
1877 		goto err_vqs;
1878 
1879 	__module_get(THIS_MODULE);
1880 
1881 	return 0;
1882 err_vqs:
1883 	device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1884 err_dev:
1885 	idr_remove(&vduse_idr, dev->minor);
1886 err_idr:
1887 	kfree(dev->name);
1888 err_str:
1889 	vduse_dev_destroy(dev);
1890 err:
1891 	return ret;
1892 }
1893 
vduse_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1894 static long vduse_ioctl(struct file *file, unsigned int cmd,
1895 			unsigned long arg)
1896 {
1897 	int ret;
1898 	void __user *argp = (void __user *)arg;
1899 	struct vduse_control *control = file->private_data;
1900 
1901 	mutex_lock(&vduse_lock);
1902 	switch (cmd) {
1903 	case VDUSE_GET_API_VERSION:
1904 		ret = put_user(control->api_version, (u64 __user *)argp);
1905 		break;
1906 	case VDUSE_SET_API_VERSION: {
1907 		u64 api_version;
1908 
1909 		ret = -EFAULT;
1910 		if (get_user(api_version, (u64 __user *)argp))
1911 			break;
1912 
1913 		ret = -EINVAL;
1914 		if (api_version > VDUSE_API_VERSION)
1915 			break;
1916 
1917 		ret = 0;
1918 		control->api_version = api_version;
1919 		break;
1920 	}
1921 	case VDUSE_CREATE_DEV: {
1922 		struct vduse_dev_config config;
1923 		unsigned long size = offsetof(struct vduse_dev_config, config);
1924 		void *buf;
1925 
1926 		ret = -EFAULT;
1927 		if (copy_from_user(&config, argp, size))
1928 			break;
1929 
1930 		ret = -EINVAL;
1931 		if (vduse_validate_config(&config) == false)
1932 			break;
1933 
1934 		buf = vmemdup_user(argp + size, config.config_size);
1935 		if (IS_ERR(buf)) {
1936 			ret = PTR_ERR(buf);
1937 			break;
1938 		}
1939 		config.name[VDUSE_NAME_MAX - 1] = '\0';
1940 		ret = vduse_create_dev(&config, buf, control->api_version);
1941 		if (ret)
1942 			kvfree(buf);
1943 		break;
1944 	}
1945 	case VDUSE_DESTROY_DEV: {
1946 		char name[VDUSE_NAME_MAX];
1947 
1948 		ret = -EFAULT;
1949 		if (copy_from_user(name, argp, VDUSE_NAME_MAX))
1950 			break;
1951 
1952 		name[VDUSE_NAME_MAX - 1] = '\0';
1953 		ret = vduse_destroy_dev(name);
1954 		break;
1955 	}
1956 	default:
1957 		ret = -EINVAL;
1958 		break;
1959 	}
1960 	mutex_unlock(&vduse_lock);
1961 
1962 	return ret;
1963 }
1964 
vduse_release(struct inode * inode,struct file * file)1965 static int vduse_release(struct inode *inode, struct file *file)
1966 {
1967 	struct vduse_control *control = file->private_data;
1968 
1969 	kfree(control);
1970 	return 0;
1971 }
1972 
vduse_open(struct inode * inode,struct file * file)1973 static int vduse_open(struct inode *inode, struct file *file)
1974 {
1975 	struct vduse_control *control;
1976 
1977 	control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
1978 	if (!control)
1979 		return -ENOMEM;
1980 
1981 	control->api_version = VDUSE_API_VERSION;
1982 	file->private_data = control;
1983 
1984 	return 0;
1985 }
1986 
1987 static const struct file_operations vduse_ctrl_fops = {
1988 	.owner		= THIS_MODULE,
1989 	.open		= vduse_open,
1990 	.release	= vduse_release,
1991 	.unlocked_ioctl	= vduse_ioctl,
1992 	.compat_ioctl	= compat_ptr_ioctl,
1993 	.llseek		= noop_llseek,
1994 };
1995 
1996 struct vduse_mgmt_dev {
1997 	struct vdpa_mgmt_dev mgmt_dev;
1998 	struct device dev;
1999 };
2000 
2001 static struct vduse_mgmt_dev *vduse_mgmt;
2002 
vduse_dev_init_vdpa(struct vduse_dev * dev,const char * name)2003 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
2004 {
2005 	struct vduse_vdpa *vdev;
2006 	int ret;
2007 
2008 	if (dev->vdev)
2009 		return -EEXIST;
2010 
2011 	vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
2012 				 &vduse_vdpa_config_ops, 1, 1, name, true);
2013 	if (IS_ERR(vdev))
2014 		return PTR_ERR(vdev);
2015 
2016 	dev->vdev = vdev;
2017 	vdev->dev = dev;
2018 	vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
2019 	ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
2020 	if (ret) {
2021 		put_device(&vdev->vdpa.dev);
2022 		return ret;
2023 	}
2024 	set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
2025 	vdev->vdpa.dma_dev = &vdev->vdpa.dev;
2026 	vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
2027 
2028 	return 0;
2029 }
2030 
vdpa_dev_add(struct vdpa_mgmt_dev * mdev,const char * name,const struct vdpa_dev_set_config * config)2031 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
2032 			const struct vdpa_dev_set_config *config)
2033 {
2034 	struct vduse_dev *dev;
2035 	int ret;
2036 
2037 	mutex_lock(&vduse_lock);
2038 	dev = vduse_find_dev(name);
2039 	if (!dev || !vduse_dev_is_ready(dev)) {
2040 		mutex_unlock(&vduse_lock);
2041 		return -EINVAL;
2042 	}
2043 	ret = vduse_dev_init_vdpa(dev, name);
2044 	mutex_unlock(&vduse_lock);
2045 	if (ret)
2046 		return ret;
2047 
2048 	mutex_lock(&dev->domain_lock);
2049 	if (!dev->domain)
2050 		dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
2051 						  dev->bounce_size);
2052 	mutex_unlock(&dev->domain_lock);
2053 	if (!dev->domain) {
2054 		put_device(&dev->vdev->vdpa.dev);
2055 		return -ENOMEM;
2056 	}
2057 
2058 	ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
2059 	if (ret) {
2060 		put_device(&dev->vdev->vdpa.dev);
2061 		mutex_lock(&dev->domain_lock);
2062 		vduse_domain_destroy(dev->domain);
2063 		dev->domain = NULL;
2064 		mutex_unlock(&dev->domain_lock);
2065 		return ret;
2066 	}
2067 
2068 	return 0;
2069 }
2070 
vdpa_dev_del(struct vdpa_mgmt_dev * mdev,struct vdpa_device * dev)2071 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
2072 {
2073 	_vdpa_unregister_device(dev);
2074 }
2075 
2076 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
2077 	.dev_add = vdpa_dev_add,
2078 	.dev_del = vdpa_dev_del,
2079 };
2080 
2081 static struct virtio_device_id id_table[] = {
2082 	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
2083 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
2084 	{ 0 },
2085 };
2086 
vduse_mgmtdev_release(struct device * dev)2087 static void vduse_mgmtdev_release(struct device *dev)
2088 {
2089 	struct vduse_mgmt_dev *mgmt_dev;
2090 
2091 	mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
2092 	kfree(mgmt_dev);
2093 }
2094 
vduse_mgmtdev_init(void)2095 static int vduse_mgmtdev_init(void)
2096 {
2097 	int ret;
2098 
2099 	vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
2100 	if (!vduse_mgmt)
2101 		return -ENOMEM;
2102 
2103 	ret = dev_set_name(&vduse_mgmt->dev, "vduse");
2104 	if (ret) {
2105 		kfree(vduse_mgmt);
2106 		return ret;
2107 	}
2108 
2109 	vduse_mgmt->dev.release = vduse_mgmtdev_release;
2110 
2111 	ret = device_register(&vduse_mgmt->dev);
2112 	if (ret)
2113 		goto dev_reg_err;
2114 
2115 	vduse_mgmt->mgmt_dev.id_table = id_table;
2116 	vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
2117 	vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
2118 	ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
2119 	if (ret)
2120 		device_unregister(&vduse_mgmt->dev);
2121 
2122 	return ret;
2123 
2124 dev_reg_err:
2125 	put_device(&vduse_mgmt->dev);
2126 	return ret;
2127 }
2128 
vduse_mgmtdev_exit(void)2129 static void vduse_mgmtdev_exit(void)
2130 {
2131 	vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
2132 	device_unregister(&vduse_mgmt->dev);
2133 }
2134 
vduse_init(void)2135 static int vduse_init(void)
2136 {
2137 	int ret;
2138 	struct device *dev;
2139 
2140 	ret = class_register(&vduse_class);
2141 	if (ret)
2142 		return ret;
2143 
2144 	ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
2145 	if (ret)
2146 		goto err_chardev_region;
2147 
2148 	/* /dev/vduse/control */
2149 	cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
2150 	vduse_ctrl_cdev.owner = THIS_MODULE;
2151 	ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
2152 	if (ret)
2153 		goto err_ctrl_cdev;
2154 
2155 	dev = device_create(&vduse_class, NULL, vduse_major, NULL, "control");
2156 	if (IS_ERR(dev)) {
2157 		ret = PTR_ERR(dev);
2158 		goto err_device;
2159 	}
2160 
2161 	/* /dev/vduse/$DEVICE */
2162 	cdev_init(&vduse_cdev, &vduse_dev_fops);
2163 	vduse_cdev.owner = THIS_MODULE;
2164 	ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
2165 		       VDUSE_DEV_MAX - 1);
2166 	if (ret)
2167 		goto err_cdev;
2168 
2169 	ret = -ENOMEM;
2170 	vduse_irq_wq = alloc_workqueue("vduse-irq",
2171 				WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
2172 	if (!vduse_irq_wq)
2173 		goto err_wq;
2174 
2175 	vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
2176 	if (!vduse_irq_bound_wq)
2177 		goto err_bound_wq;
2178 
2179 	ret = vduse_domain_init();
2180 	if (ret)
2181 		goto err_domain;
2182 
2183 	ret = vduse_mgmtdev_init();
2184 	if (ret)
2185 		goto err_mgmtdev;
2186 
2187 	return 0;
2188 err_mgmtdev:
2189 	vduse_domain_exit();
2190 err_domain:
2191 	destroy_workqueue(vduse_irq_bound_wq);
2192 err_bound_wq:
2193 	destroy_workqueue(vduse_irq_wq);
2194 err_wq:
2195 	cdev_del(&vduse_cdev);
2196 err_cdev:
2197 	device_destroy(&vduse_class, vduse_major);
2198 err_device:
2199 	cdev_del(&vduse_ctrl_cdev);
2200 err_ctrl_cdev:
2201 	unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2202 err_chardev_region:
2203 	class_unregister(&vduse_class);
2204 	return ret;
2205 }
2206 module_init(vduse_init);
2207 
vduse_exit(void)2208 static void vduse_exit(void)
2209 {
2210 	vduse_mgmtdev_exit();
2211 	vduse_domain_exit();
2212 	destroy_workqueue(vduse_irq_bound_wq);
2213 	destroy_workqueue(vduse_irq_wq);
2214 	cdev_del(&vduse_cdev);
2215 	device_destroy(&vduse_class, vduse_major);
2216 	cdev_del(&vduse_ctrl_cdev);
2217 	unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2218 	class_unregister(&vduse_class);
2219 }
2220 module_exit(vduse_exit);
2221 
2222 MODULE_LICENSE(DRV_LICENSE);
2223 MODULE_AUTHOR(DRV_AUTHOR);
2224 MODULE_DESCRIPTION(DRV_DESC);
2225