1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * VDUSE: vDPA Device in Userspace
4 *
5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
6 *
7 * Author: Xie Yongji <xieyongji@bytedance.com>
8 *
9 */
10
11 #include "linux/virtio_net.h"
12 #include <linux/init.h>
13 #include <linux/module.h>
14 #include <linux/cdev.h>
15 #include <linux/device.h>
16 #include <linux/eventfd.h>
17 #include <linux/slab.h>
18 #include <linux/wait.h>
19 #include <linux/dma-map-ops.h>
20 #include <linux/poll.h>
21 #include <linux/file.h>
22 #include <linux/uio.h>
23 #include <linux/vdpa.h>
24 #include <linux/nospec.h>
25 #include <linux/vmalloc.h>
26 #include <linux/sched/mm.h>
27 #include <uapi/linux/vduse.h>
28 #include <uapi/linux/vdpa.h>
29 #include <uapi/linux/virtio_config.h>
30 #include <uapi/linux/virtio_ids.h>
31 #include <uapi/linux/virtio_blk.h>
32 #include <uapi/linux/virtio_ring.h>
33 #include <linux/mod_devicetable.h>
34
35 #include "iova_domain.h"
36
37 #define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>"
38 #define DRV_DESC "vDPA Device in Userspace"
39 #define DRV_LICENSE "GPL v2"
40
41 #define VDUSE_DEV_MAX (1U << MINORBITS)
42 #define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024)
43 #define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024)
44 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
45 /* 128 MB reserved for virtqueue creation */
46 #define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024)
47 #define VDUSE_MSG_DEFAULT_TIMEOUT 30
48
49 #define IRQ_UNBOUND -1
50
51 struct vduse_virtqueue {
52 u16 index;
53 u16 num_max;
54 u32 num;
55 u64 desc_addr;
56 u64 driver_addr;
57 u64 device_addr;
58 struct vdpa_vq_state state;
59 bool ready;
60 bool kicked;
61 spinlock_t kick_lock;
62 spinlock_t irq_lock;
63 struct eventfd_ctx *kickfd;
64 struct vdpa_callback cb;
65 struct work_struct inject;
66 struct work_struct kick;
67 int irq_effective_cpu;
68 struct cpumask irq_affinity;
69 struct kobject kobj;
70 };
71
72 struct vduse_dev;
73
74 struct vduse_vdpa {
75 struct vdpa_device vdpa;
76 struct vduse_dev *dev;
77 };
78
79 struct vduse_umem {
80 unsigned long iova;
81 unsigned long npages;
82 struct page **pages;
83 struct mm_struct *mm;
84 };
85
86 struct vduse_dev {
87 struct vduse_vdpa *vdev;
88 struct device *dev;
89 struct vduse_virtqueue **vqs;
90 struct vduse_iova_domain *domain;
91 char *name;
92 struct mutex lock;
93 spinlock_t msg_lock;
94 u64 msg_unique;
95 u32 msg_timeout;
96 wait_queue_head_t waitq;
97 struct list_head send_list;
98 struct list_head recv_list;
99 struct vdpa_callback config_cb;
100 struct work_struct inject;
101 spinlock_t irq_lock;
102 struct rw_semaphore rwsem;
103 int minor;
104 bool broken;
105 bool connected;
106 u64 api_version;
107 u64 device_features;
108 u64 driver_features;
109 u32 device_id;
110 u32 vendor_id;
111 u32 generation;
112 u32 config_size;
113 void *config;
114 u8 status;
115 u32 vq_num;
116 u32 vq_align;
117 struct vduse_umem *umem;
118 struct mutex mem_lock;
119 unsigned int bounce_size;
120 struct mutex domain_lock;
121 };
122
123 struct vduse_dev_msg {
124 struct vduse_dev_request req;
125 struct vduse_dev_response resp;
126 struct list_head list;
127 wait_queue_head_t waitq;
128 bool completed;
129 };
130
131 struct vduse_control {
132 u64 api_version;
133 };
134
135 static DEFINE_MUTEX(vduse_lock);
136 static DEFINE_IDR(vduse_idr);
137
138 static dev_t vduse_major;
139 static struct cdev vduse_ctrl_cdev;
140 static struct cdev vduse_cdev;
141 static struct workqueue_struct *vduse_irq_wq;
142 static struct workqueue_struct *vduse_irq_bound_wq;
143
144 static u32 allowed_device_id[] = {
145 VIRTIO_ID_BLOCK,
146 VIRTIO_ID_NET,
147 VIRTIO_ID_FS,
148 };
149
vdpa_to_vduse(struct vdpa_device * vdpa)150 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
151 {
152 struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
153
154 return vdev->dev;
155 }
156
dev_to_vduse(struct device * dev)157 static inline struct vduse_dev *dev_to_vduse(struct device *dev)
158 {
159 struct vdpa_device *vdpa = dev_to_vdpa(dev);
160
161 return vdpa_to_vduse(vdpa);
162 }
163
vduse_find_msg(struct list_head * head,uint32_t request_id)164 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
165 uint32_t request_id)
166 {
167 struct vduse_dev_msg *msg;
168
169 list_for_each_entry(msg, head, list) {
170 if (msg->req.request_id == request_id) {
171 list_del(&msg->list);
172 return msg;
173 }
174 }
175
176 return NULL;
177 }
178
vduse_dequeue_msg(struct list_head * head)179 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
180 {
181 struct vduse_dev_msg *msg = NULL;
182
183 if (!list_empty(head)) {
184 msg = list_first_entry(head, struct vduse_dev_msg, list);
185 list_del(&msg->list);
186 }
187
188 return msg;
189 }
190
vduse_enqueue_msg(struct list_head * head,struct vduse_dev_msg * msg)191 static void vduse_enqueue_msg(struct list_head *head,
192 struct vduse_dev_msg *msg)
193 {
194 list_add_tail(&msg->list, head);
195 }
196
vduse_dev_broken(struct vduse_dev * dev)197 static void vduse_dev_broken(struct vduse_dev *dev)
198 {
199 struct vduse_dev_msg *msg, *tmp;
200
201 if (unlikely(dev->broken))
202 return;
203
204 list_splice_init(&dev->recv_list, &dev->send_list);
205 list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
206 list_del(&msg->list);
207 msg->completed = 1;
208 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
209 wake_up(&msg->waitq);
210 }
211 dev->broken = true;
212 wake_up(&dev->waitq);
213 }
214
vduse_dev_msg_sync(struct vduse_dev * dev,struct vduse_dev_msg * msg)215 static int vduse_dev_msg_sync(struct vduse_dev *dev,
216 struct vduse_dev_msg *msg)
217 {
218 int ret;
219
220 if (unlikely(dev->broken))
221 return -EIO;
222
223 init_waitqueue_head(&msg->waitq);
224 spin_lock(&dev->msg_lock);
225 if (unlikely(dev->broken)) {
226 spin_unlock(&dev->msg_lock);
227 return -EIO;
228 }
229 msg->req.request_id = dev->msg_unique++;
230 vduse_enqueue_msg(&dev->send_list, msg);
231 wake_up(&dev->waitq);
232 spin_unlock(&dev->msg_lock);
233 if (dev->msg_timeout)
234 ret = wait_event_killable_timeout(msg->waitq, msg->completed,
235 (long)dev->msg_timeout * HZ);
236 else
237 ret = wait_event_killable(msg->waitq, msg->completed);
238
239 spin_lock(&dev->msg_lock);
240 if (!msg->completed) {
241 list_del(&msg->list);
242 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
243 /* Mark the device as malfunction when there is a timeout */
244 if (!ret)
245 vduse_dev_broken(dev);
246 }
247 ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
248 spin_unlock(&dev->msg_lock);
249
250 return ret;
251 }
252
vduse_dev_get_vq_state_packed(struct vduse_dev * dev,struct vduse_virtqueue * vq,struct vdpa_vq_state_packed * packed)253 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
254 struct vduse_virtqueue *vq,
255 struct vdpa_vq_state_packed *packed)
256 {
257 struct vduse_dev_msg msg = { 0 };
258 int ret;
259
260 msg.req.type = VDUSE_GET_VQ_STATE;
261 msg.req.vq_state.index = vq->index;
262
263 ret = vduse_dev_msg_sync(dev, &msg);
264 if (ret)
265 return ret;
266
267 packed->last_avail_counter =
268 msg.resp.vq_state.packed.last_avail_counter & 0x0001;
269 packed->last_avail_idx =
270 msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
271 packed->last_used_counter =
272 msg.resp.vq_state.packed.last_used_counter & 0x0001;
273 packed->last_used_idx =
274 msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
275
276 return 0;
277 }
278
vduse_dev_get_vq_state_split(struct vduse_dev * dev,struct vduse_virtqueue * vq,struct vdpa_vq_state_split * split)279 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
280 struct vduse_virtqueue *vq,
281 struct vdpa_vq_state_split *split)
282 {
283 struct vduse_dev_msg msg = { 0 };
284 int ret;
285
286 msg.req.type = VDUSE_GET_VQ_STATE;
287 msg.req.vq_state.index = vq->index;
288
289 ret = vduse_dev_msg_sync(dev, &msg);
290 if (ret)
291 return ret;
292
293 split->avail_index = msg.resp.vq_state.split.avail_index;
294
295 return 0;
296 }
297
vduse_dev_set_status(struct vduse_dev * dev,u8 status)298 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
299 {
300 struct vduse_dev_msg msg = { 0 };
301
302 msg.req.type = VDUSE_SET_STATUS;
303 msg.req.s.status = status;
304
305 return vduse_dev_msg_sync(dev, &msg);
306 }
307
vduse_dev_update_iotlb(struct vduse_dev * dev,u64 start,u64 last)308 static int vduse_dev_update_iotlb(struct vduse_dev *dev,
309 u64 start, u64 last)
310 {
311 struct vduse_dev_msg msg = { 0 };
312
313 if (last < start)
314 return -EINVAL;
315
316 msg.req.type = VDUSE_UPDATE_IOTLB;
317 msg.req.iova.start = start;
318 msg.req.iova.last = last;
319
320 return vduse_dev_msg_sync(dev, &msg);
321 }
322
vduse_dev_read_iter(struct kiocb * iocb,struct iov_iter * to)323 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
324 {
325 struct file *file = iocb->ki_filp;
326 struct vduse_dev *dev = file->private_data;
327 struct vduse_dev_msg *msg;
328 int size = sizeof(struct vduse_dev_request);
329 ssize_t ret;
330
331 if (iov_iter_count(to) < size)
332 return -EINVAL;
333
334 spin_lock(&dev->msg_lock);
335 while (1) {
336 msg = vduse_dequeue_msg(&dev->send_list);
337 if (msg)
338 break;
339
340 ret = -EAGAIN;
341 if (file->f_flags & O_NONBLOCK)
342 goto unlock;
343
344 spin_unlock(&dev->msg_lock);
345 ret = wait_event_interruptible_exclusive(dev->waitq,
346 !list_empty(&dev->send_list));
347 if (ret)
348 return ret;
349
350 spin_lock(&dev->msg_lock);
351 }
352 spin_unlock(&dev->msg_lock);
353 ret = copy_to_iter(&msg->req, size, to);
354 spin_lock(&dev->msg_lock);
355 if (ret != size) {
356 ret = -EFAULT;
357 vduse_enqueue_msg(&dev->send_list, msg);
358 goto unlock;
359 }
360 vduse_enqueue_msg(&dev->recv_list, msg);
361 unlock:
362 spin_unlock(&dev->msg_lock);
363
364 return ret;
365 }
366
is_mem_zero(const char * ptr,int size)367 static bool is_mem_zero(const char *ptr, int size)
368 {
369 int i;
370
371 for (i = 0; i < size; i++) {
372 if (ptr[i])
373 return false;
374 }
375 return true;
376 }
377
vduse_dev_write_iter(struct kiocb * iocb,struct iov_iter * from)378 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
379 {
380 struct file *file = iocb->ki_filp;
381 struct vduse_dev *dev = file->private_data;
382 struct vduse_dev_response resp;
383 struct vduse_dev_msg *msg;
384 size_t ret;
385
386 ret = copy_from_iter(&resp, sizeof(resp), from);
387 if (ret != sizeof(resp))
388 return -EINVAL;
389
390 if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
391 return -EINVAL;
392
393 spin_lock(&dev->msg_lock);
394 msg = vduse_find_msg(&dev->recv_list, resp.request_id);
395 if (!msg) {
396 ret = -ENOENT;
397 goto unlock;
398 }
399
400 memcpy(&msg->resp, &resp, sizeof(resp));
401 msg->completed = 1;
402 wake_up(&msg->waitq);
403 unlock:
404 spin_unlock(&dev->msg_lock);
405
406 return ret;
407 }
408
vduse_dev_poll(struct file * file,poll_table * wait)409 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
410 {
411 struct vduse_dev *dev = file->private_data;
412 __poll_t mask = 0;
413
414 poll_wait(file, &dev->waitq, wait);
415
416 spin_lock(&dev->msg_lock);
417
418 if (unlikely(dev->broken))
419 mask |= EPOLLERR;
420 if (!list_empty(&dev->send_list))
421 mask |= EPOLLIN | EPOLLRDNORM;
422 if (!list_empty(&dev->recv_list))
423 mask |= EPOLLOUT | EPOLLWRNORM;
424
425 spin_unlock(&dev->msg_lock);
426
427 return mask;
428 }
429
vduse_dev_reset(struct vduse_dev * dev)430 static void vduse_dev_reset(struct vduse_dev *dev)
431 {
432 int i;
433 struct vduse_iova_domain *domain = dev->domain;
434
435 /* The coherent mappings are handled in vduse_dev_free_coherent() */
436 if (domain && domain->bounce_map)
437 vduse_domain_reset_bounce_map(domain);
438
439 down_write(&dev->rwsem);
440
441 dev->status = 0;
442 dev->driver_features = 0;
443 dev->generation++;
444 spin_lock(&dev->irq_lock);
445 dev->config_cb.callback = NULL;
446 dev->config_cb.private = NULL;
447 spin_unlock(&dev->irq_lock);
448 flush_work(&dev->inject);
449
450 for (i = 0; i < dev->vq_num; i++) {
451 struct vduse_virtqueue *vq = dev->vqs[i];
452
453 vq->ready = false;
454 vq->desc_addr = 0;
455 vq->driver_addr = 0;
456 vq->device_addr = 0;
457 vq->num = 0;
458 memset(&vq->state, 0, sizeof(vq->state));
459
460 spin_lock(&vq->kick_lock);
461 vq->kicked = false;
462 if (vq->kickfd)
463 eventfd_ctx_put(vq->kickfd);
464 vq->kickfd = NULL;
465 spin_unlock(&vq->kick_lock);
466
467 spin_lock(&vq->irq_lock);
468 vq->cb.callback = NULL;
469 vq->cb.private = NULL;
470 vq->cb.trigger = NULL;
471 spin_unlock(&vq->irq_lock);
472 flush_work(&vq->inject);
473 flush_work(&vq->kick);
474 }
475
476 up_write(&dev->rwsem);
477 }
478
vduse_vdpa_set_vq_address(struct vdpa_device * vdpa,u16 idx,u64 desc_area,u64 driver_area,u64 device_area)479 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
480 u64 desc_area, u64 driver_area,
481 u64 device_area)
482 {
483 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
484 struct vduse_virtqueue *vq = dev->vqs[idx];
485
486 vq->desc_addr = desc_area;
487 vq->driver_addr = driver_area;
488 vq->device_addr = device_area;
489
490 return 0;
491 }
492
vduse_vq_kick(struct vduse_virtqueue * vq)493 static void vduse_vq_kick(struct vduse_virtqueue *vq)
494 {
495 spin_lock(&vq->kick_lock);
496 if (!vq->ready)
497 goto unlock;
498
499 if (vq->kickfd)
500 eventfd_signal(vq->kickfd);
501 else
502 vq->kicked = true;
503 unlock:
504 spin_unlock(&vq->kick_lock);
505 }
506
vduse_vq_kick_work(struct work_struct * work)507 static void vduse_vq_kick_work(struct work_struct *work)
508 {
509 struct vduse_virtqueue *vq = container_of(work,
510 struct vduse_virtqueue, kick);
511
512 vduse_vq_kick(vq);
513 }
514
vduse_vdpa_kick_vq(struct vdpa_device * vdpa,u16 idx)515 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
516 {
517 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
518 struct vduse_virtqueue *vq = dev->vqs[idx];
519
520 if (!eventfd_signal_allowed()) {
521 schedule_work(&vq->kick);
522 return;
523 }
524 vduse_vq_kick(vq);
525 }
526
vduse_vdpa_set_vq_cb(struct vdpa_device * vdpa,u16 idx,struct vdpa_callback * cb)527 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
528 struct vdpa_callback *cb)
529 {
530 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
531 struct vduse_virtqueue *vq = dev->vqs[idx];
532
533 spin_lock(&vq->irq_lock);
534 vq->cb.callback = cb->callback;
535 vq->cb.private = cb->private;
536 vq->cb.trigger = cb->trigger;
537 spin_unlock(&vq->irq_lock);
538 }
539
vduse_vdpa_set_vq_num(struct vdpa_device * vdpa,u16 idx,u32 num)540 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
541 {
542 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
543 struct vduse_virtqueue *vq = dev->vqs[idx];
544
545 vq->num = num;
546 }
547
vduse_vdpa_get_vq_size(struct vdpa_device * vdpa,u16 idx)548 static u16 vduse_vdpa_get_vq_size(struct vdpa_device *vdpa, u16 idx)
549 {
550 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
551 struct vduse_virtqueue *vq = dev->vqs[idx];
552
553 if (vq->num)
554 return vq->num;
555 else
556 return vq->num_max;
557 }
558
vduse_vdpa_set_vq_ready(struct vdpa_device * vdpa,u16 idx,bool ready)559 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
560 u16 idx, bool ready)
561 {
562 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
563 struct vduse_virtqueue *vq = dev->vqs[idx];
564
565 vq->ready = ready;
566 }
567
vduse_vdpa_get_vq_ready(struct vdpa_device * vdpa,u16 idx)568 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
569 {
570 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
571 struct vduse_virtqueue *vq = dev->vqs[idx];
572
573 return vq->ready;
574 }
575
vduse_vdpa_set_vq_state(struct vdpa_device * vdpa,u16 idx,const struct vdpa_vq_state * state)576 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
577 const struct vdpa_vq_state *state)
578 {
579 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
580 struct vduse_virtqueue *vq = dev->vqs[idx];
581
582 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
583 vq->state.packed.last_avail_counter =
584 state->packed.last_avail_counter;
585 vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
586 vq->state.packed.last_used_counter =
587 state->packed.last_used_counter;
588 vq->state.packed.last_used_idx = state->packed.last_used_idx;
589 } else
590 vq->state.split.avail_index = state->split.avail_index;
591
592 return 0;
593 }
594
vduse_vdpa_get_vq_state(struct vdpa_device * vdpa,u16 idx,struct vdpa_vq_state * state)595 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
596 struct vdpa_vq_state *state)
597 {
598 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
599 struct vduse_virtqueue *vq = dev->vqs[idx];
600
601 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
602 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
603
604 return vduse_dev_get_vq_state_split(dev, vq, &state->split);
605 }
606
vduse_vdpa_get_vq_align(struct vdpa_device * vdpa)607 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
608 {
609 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
610
611 return dev->vq_align;
612 }
613
vduse_vdpa_get_device_features(struct vdpa_device * vdpa)614 static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
615 {
616 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
617
618 return dev->device_features;
619 }
620
vduse_vdpa_set_driver_features(struct vdpa_device * vdpa,u64 features)621 static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
622 {
623 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
624
625 dev->driver_features = features;
626 return 0;
627 }
628
vduse_vdpa_get_driver_features(struct vdpa_device * vdpa)629 static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
630 {
631 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
632
633 return dev->driver_features;
634 }
635
vduse_vdpa_set_config_cb(struct vdpa_device * vdpa,struct vdpa_callback * cb)636 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
637 struct vdpa_callback *cb)
638 {
639 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
640
641 spin_lock(&dev->irq_lock);
642 dev->config_cb.callback = cb->callback;
643 dev->config_cb.private = cb->private;
644 spin_unlock(&dev->irq_lock);
645 }
646
vduse_vdpa_get_vq_num_max(struct vdpa_device * vdpa)647 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
648 {
649 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
650 u16 num_max = 0;
651 int i;
652
653 for (i = 0; i < dev->vq_num; i++)
654 if (num_max < dev->vqs[i]->num_max)
655 num_max = dev->vqs[i]->num_max;
656
657 return num_max;
658 }
659
vduse_vdpa_get_device_id(struct vdpa_device * vdpa)660 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
661 {
662 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
663
664 return dev->device_id;
665 }
666
vduse_vdpa_get_vendor_id(struct vdpa_device * vdpa)667 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
668 {
669 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
670
671 return dev->vendor_id;
672 }
673
vduse_vdpa_get_status(struct vdpa_device * vdpa)674 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
675 {
676 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
677
678 return dev->status;
679 }
680
vduse_vdpa_set_status(struct vdpa_device * vdpa,u8 status)681 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
682 {
683 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
684
685 if (vduse_dev_set_status(dev, status))
686 return;
687
688 dev->status = status;
689 }
690
vduse_vdpa_get_config_size(struct vdpa_device * vdpa)691 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
692 {
693 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
694
695 return dev->config_size;
696 }
697
vduse_vdpa_get_config(struct vdpa_device * vdpa,unsigned int offset,void * buf,unsigned int len)698 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
699 void *buf, unsigned int len)
700 {
701 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
702
703 /* Initialize the buffer in case of partial copy. */
704 memset(buf, 0, len);
705
706 if (offset > dev->config_size)
707 return;
708
709 if (len > dev->config_size - offset)
710 len = dev->config_size - offset;
711
712 memcpy(buf, dev->config + offset, len);
713 }
714
vduse_vdpa_set_config(struct vdpa_device * vdpa,unsigned int offset,const void * buf,unsigned int len)715 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
716 const void *buf, unsigned int len)
717 {
718 /* Now we only support read-only configuration space */
719 }
720
vduse_vdpa_reset(struct vdpa_device * vdpa)721 static int vduse_vdpa_reset(struct vdpa_device *vdpa)
722 {
723 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
724 int ret = vduse_dev_set_status(dev, 0);
725
726 vduse_dev_reset(dev);
727
728 return ret;
729 }
730
vduse_vdpa_get_generation(struct vdpa_device * vdpa)731 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
732 {
733 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
734
735 return dev->generation;
736 }
737
vduse_vdpa_set_vq_affinity(struct vdpa_device * vdpa,u16 idx,const struct cpumask * cpu_mask)738 static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
739 const struct cpumask *cpu_mask)
740 {
741 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
742
743 if (cpu_mask)
744 cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
745 else
746 cpumask_setall(&dev->vqs[idx]->irq_affinity);
747
748 return 0;
749 }
750
751 static const struct cpumask *
vduse_vdpa_get_vq_affinity(struct vdpa_device * vdpa,u16 idx)752 vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx)
753 {
754 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
755
756 return &dev->vqs[idx]->irq_affinity;
757 }
758
vduse_vdpa_set_map(struct vdpa_device * vdpa,unsigned int asid,struct vhost_iotlb * iotlb)759 static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
760 unsigned int asid,
761 struct vhost_iotlb *iotlb)
762 {
763 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
764 int ret;
765
766 ret = vduse_domain_set_map(dev->domain, iotlb);
767 if (ret)
768 return ret;
769
770 ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
771 if (ret) {
772 vduse_domain_clear_map(dev->domain, iotlb);
773 return ret;
774 }
775
776 return 0;
777 }
778
vduse_vdpa_free(struct vdpa_device * vdpa)779 static void vduse_vdpa_free(struct vdpa_device *vdpa)
780 {
781 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
782
783 dev->vdev = NULL;
784 }
785
786 static const struct vdpa_config_ops vduse_vdpa_config_ops = {
787 .set_vq_address = vduse_vdpa_set_vq_address,
788 .kick_vq = vduse_vdpa_kick_vq,
789 .set_vq_cb = vduse_vdpa_set_vq_cb,
790 .set_vq_num = vduse_vdpa_set_vq_num,
791 .get_vq_size = vduse_vdpa_get_vq_size,
792 .set_vq_ready = vduse_vdpa_set_vq_ready,
793 .get_vq_ready = vduse_vdpa_get_vq_ready,
794 .set_vq_state = vduse_vdpa_set_vq_state,
795 .get_vq_state = vduse_vdpa_get_vq_state,
796 .get_vq_align = vduse_vdpa_get_vq_align,
797 .get_device_features = vduse_vdpa_get_device_features,
798 .set_driver_features = vduse_vdpa_set_driver_features,
799 .get_driver_features = vduse_vdpa_get_driver_features,
800 .set_config_cb = vduse_vdpa_set_config_cb,
801 .get_vq_num_max = vduse_vdpa_get_vq_num_max,
802 .get_device_id = vduse_vdpa_get_device_id,
803 .get_vendor_id = vduse_vdpa_get_vendor_id,
804 .get_status = vduse_vdpa_get_status,
805 .set_status = vduse_vdpa_set_status,
806 .get_config_size = vduse_vdpa_get_config_size,
807 .get_config = vduse_vdpa_get_config,
808 .set_config = vduse_vdpa_set_config,
809 .get_generation = vduse_vdpa_get_generation,
810 .set_vq_affinity = vduse_vdpa_set_vq_affinity,
811 .get_vq_affinity = vduse_vdpa_get_vq_affinity,
812 .reset = vduse_vdpa_reset,
813 .set_map = vduse_vdpa_set_map,
814 .free = vduse_vdpa_free,
815 };
816
vduse_dev_sync_single_for_device(struct device * dev,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir)817 static void vduse_dev_sync_single_for_device(struct device *dev,
818 dma_addr_t dma_addr, size_t size,
819 enum dma_data_direction dir)
820 {
821 struct vduse_dev *vdev = dev_to_vduse(dev);
822 struct vduse_iova_domain *domain = vdev->domain;
823
824 vduse_domain_sync_single_for_device(domain, dma_addr, size, dir);
825 }
826
vduse_dev_sync_single_for_cpu(struct device * dev,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir)827 static void vduse_dev_sync_single_for_cpu(struct device *dev,
828 dma_addr_t dma_addr, size_t size,
829 enum dma_data_direction dir)
830 {
831 struct vduse_dev *vdev = dev_to_vduse(dev);
832 struct vduse_iova_domain *domain = vdev->domain;
833
834 vduse_domain_sync_single_for_cpu(domain, dma_addr, size, dir);
835 }
836
vduse_dev_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,unsigned long attrs)837 static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
838 unsigned long offset, size_t size,
839 enum dma_data_direction dir,
840 unsigned long attrs)
841 {
842 struct vduse_dev *vdev = dev_to_vduse(dev);
843 struct vduse_iova_domain *domain = vdev->domain;
844
845 return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
846 }
847
vduse_dev_unmap_page(struct device * dev,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)848 static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
849 size_t size, enum dma_data_direction dir,
850 unsigned long attrs)
851 {
852 struct vduse_dev *vdev = dev_to_vduse(dev);
853 struct vduse_iova_domain *domain = vdev->domain;
854
855 return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
856 }
857
vduse_dev_alloc_coherent(struct device * dev,size_t size,dma_addr_t * dma_addr,gfp_t flag,unsigned long attrs)858 static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
859 dma_addr_t *dma_addr, gfp_t flag,
860 unsigned long attrs)
861 {
862 struct vduse_dev *vdev = dev_to_vduse(dev);
863 struct vduse_iova_domain *domain = vdev->domain;
864 unsigned long iova;
865 void *addr;
866
867 *dma_addr = DMA_MAPPING_ERROR;
868 addr = vduse_domain_alloc_coherent(domain, size,
869 (dma_addr_t *)&iova, flag, attrs);
870 if (!addr)
871 return NULL;
872
873 *dma_addr = (dma_addr_t)iova;
874
875 return addr;
876 }
877
vduse_dev_free_coherent(struct device * dev,size_t size,void * vaddr,dma_addr_t dma_addr,unsigned long attrs)878 static void vduse_dev_free_coherent(struct device *dev, size_t size,
879 void *vaddr, dma_addr_t dma_addr,
880 unsigned long attrs)
881 {
882 struct vduse_dev *vdev = dev_to_vduse(dev);
883 struct vduse_iova_domain *domain = vdev->domain;
884
885 vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
886 }
887
vduse_dev_max_mapping_size(struct device * dev)888 static size_t vduse_dev_max_mapping_size(struct device *dev)
889 {
890 struct vduse_dev *vdev = dev_to_vduse(dev);
891 struct vduse_iova_domain *domain = vdev->domain;
892
893 return domain->bounce_size;
894 }
895
896 static const struct dma_map_ops vduse_dev_dma_ops = {
897 .sync_single_for_device = vduse_dev_sync_single_for_device,
898 .sync_single_for_cpu = vduse_dev_sync_single_for_cpu,
899 .map_page = vduse_dev_map_page,
900 .unmap_page = vduse_dev_unmap_page,
901 .alloc = vduse_dev_alloc_coherent,
902 .free = vduse_dev_free_coherent,
903 .max_mapping_size = vduse_dev_max_mapping_size,
904 };
905
perm_to_file_flags(u8 perm)906 static unsigned int perm_to_file_flags(u8 perm)
907 {
908 unsigned int flags = 0;
909
910 switch (perm) {
911 case VDUSE_ACCESS_WO:
912 flags |= O_WRONLY;
913 break;
914 case VDUSE_ACCESS_RO:
915 flags |= O_RDONLY;
916 break;
917 case VDUSE_ACCESS_RW:
918 flags |= O_RDWR;
919 break;
920 default:
921 WARN(1, "invalidate vhost IOTLB permission\n");
922 break;
923 }
924
925 return flags;
926 }
927
vduse_kickfd_setup(struct vduse_dev * dev,struct vduse_vq_eventfd * eventfd)928 static int vduse_kickfd_setup(struct vduse_dev *dev,
929 struct vduse_vq_eventfd *eventfd)
930 {
931 struct eventfd_ctx *ctx = NULL;
932 struct vduse_virtqueue *vq;
933 u32 index;
934
935 if (eventfd->index >= dev->vq_num)
936 return -EINVAL;
937
938 index = array_index_nospec(eventfd->index, dev->vq_num);
939 vq = dev->vqs[index];
940 if (eventfd->fd >= 0) {
941 ctx = eventfd_ctx_fdget(eventfd->fd);
942 if (IS_ERR(ctx))
943 return PTR_ERR(ctx);
944 } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
945 return 0;
946
947 spin_lock(&vq->kick_lock);
948 if (vq->kickfd)
949 eventfd_ctx_put(vq->kickfd);
950 vq->kickfd = ctx;
951 if (vq->ready && vq->kicked && vq->kickfd) {
952 eventfd_signal(vq->kickfd);
953 vq->kicked = false;
954 }
955 spin_unlock(&vq->kick_lock);
956
957 return 0;
958 }
959
vduse_dev_is_ready(struct vduse_dev * dev)960 static bool vduse_dev_is_ready(struct vduse_dev *dev)
961 {
962 int i;
963
964 for (i = 0; i < dev->vq_num; i++)
965 if (!dev->vqs[i]->num_max)
966 return false;
967
968 return true;
969 }
970
vduse_dev_irq_inject(struct work_struct * work)971 static void vduse_dev_irq_inject(struct work_struct *work)
972 {
973 struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
974
975 spin_lock_bh(&dev->irq_lock);
976 if (dev->config_cb.callback)
977 dev->config_cb.callback(dev->config_cb.private);
978 spin_unlock_bh(&dev->irq_lock);
979 }
980
vduse_vq_irq_inject(struct work_struct * work)981 static void vduse_vq_irq_inject(struct work_struct *work)
982 {
983 struct vduse_virtqueue *vq = container_of(work,
984 struct vduse_virtqueue, inject);
985
986 spin_lock_bh(&vq->irq_lock);
987 if (vq->ready && vq->cb.callback)
988 vq->cb.callback(vq->cb.private);
989 spin_unlock_bh(&vq->irq_lock);
990 }
991
vduse_vq_signal_irqfd(struct vduse_virtqueue * vq)992 static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
993 {
994 bool signal = false;
995
996 if (!vq->cb.trigger)
997 return false;
998
999 spin_lock_irq(&vq->irq_lock);
1000 if (vq->ready && vq->cb.trigger) {
1001 eventfd_signal(vq->cb.trigger);
1002 signal = true;
1003 }
1004 spin_unlock_irq(&vq->irq_lock);
1005
1006 return signal;
1007 }
1008
vduse_dev_queue_irq_work(struct vduse_dev * dev,struct work_struct * irq_work,int irq_effective_cpu)1009 static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
1010 struct work_struct *irq_work,
1011 int irq_effective_cpu)
1012 {
1013 int ret = -EINVAL;
1014
1015 down_read(&dev->rwsem);
1016 if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1017 goto unlock;
1018
1019 ret = 0;
1020 if (irq_effective_cpu == IRQ_UNBOUND)
1021 queue_work(vduse_irq_wq, irq_work);
1022 else
1023 queue_work_on(irq_effective_cpu,
1024 vduse_irq_bound_wq, irq_work);
1025 unlock:
1026 up_read(&dev->rwsem);
1027
1028 return ret;
1029 }
1030
vduse_dev_dereg_umem(struct vduse_dev * dev,u64 iova,u64 size)1031 static int vduse_dev_dereg_umem(struct vduse_dev *dev,
1032 u64 iova, u64 size)
1033 {
1034 int ret;
1035
1036 mutex_lock(&dev->mem_lock);
1037 ret = -ENOENT;
1038 if (!dev->umem)
1039 goto unlock;
1040
1041 ret = -EINVAL;
1042 if (!dev->domain)
1043 goto unlock;
1044
1045 if (dev->umem->iova != iova || size != dev->domain->bounce_size)
1046 goto unlock;
1047
1048 vduse_domain_remove_user_bounce_pages(dev->domain);
1049 unpin_user_pages_dirty_lock(dev->umem->pages,
1050 dev->umem->npages, true);
1051 atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
1052 mmdrop(dev->umem->mm);
1053 vfree(dev->umem->pages);
1054 kfree(dev->umem);
1055 dev->umem = NULL;
1056 ret = 0;
1057 unlock:
1058 mutex_unlock(&dev->mem_lock);
1059 return ret;
1060 }
1061
vduse_dev_reg_umem(struct vduse_dev * dev,u64 iova,u64 uaddr,u64 size)1062 static int vduse_dev_reg_umem(struct vduse_dev *dev,
1063 u64 iova, u64 uaddr, u64 size)
1064 {
1065 struct page **page_list = NULL;
1066 struct vduse_umem *umem = NULL;
1067 long pinned = 0;
1068 unsigned long npages, lock_limit;
1069 int ret;
1070
1071 if (!dev->domain || !dev->domain->bounce_map ||
1072 size != dev->domain->bounce_size ||
1073 iova != 0 || uaddr & ~PAGE_MASK)
1074 return -EINVAL;
1075
1076 mutex_lock(&dev->mem_lock);
1077 ret = -EEXIST;
1078 if (dev->umem)
1079 goto unlock;
1080
1081 ret = -ENOMEM;
1082 npages = size >> PAGE_SHIFT;
1083 page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
1084 GFP_KERNEL_ACCOUNT);
1085 umem = kzalloc(sizeof(*umem), GFP_KERNEL);
1086 if (!page_list || !umem)
1087 goto unlock;
1088
1089 mmap_read_lock(current->mm);
1090
1091 lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
1092 if (npages + atomic64_read(¤t->mm->pinned_vm) > lock_limit)
1093 goto out;
1094
1095 pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
1096 page_list);
1097 if (pinned != npages) {
1098 ret = pinned < 0 ? pinned : -ENOMEM;
1099 goto out;
1100 }
1101
1102 ret = vduse_domain_add_user_bounce_pages(dev->domain,
1103 page_list, pinned);
1104 if (ret)
1105 goto out;
1106
1107 atomic64_add(npages, ¤t->mm->pinned_vm);
1108
1109 umem->pages = page_list;
1110 umem->npages = pinned;
1111 umem->iova = iova;
1112 umem->mm = current->mm;
1113 mmgrab(current->mm);
1114
1115 dev->umem = umem;
1116 out:
1117 if (ret && pinned > 0)
1118 unpin_user_pages(page_list, pinned);
1119
1120 mmap_read_unlock(current->mm);
1121 unlock:
1122 if (ret) {
1123 vfree(page_list);
1124 kfree(umem);
1125 }
1126 mutex_unlock(&dev->mem_lock);
1127 return ret;
1128 }
1129
vduse_vq_update_effective_cpu(struct vduse_virtqueue * vq)1130 static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
1131 {
1132 int curr_cpu = vq->irq_effective_cpu;
1133
1134 while (true) {
1135 curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
1136 if (cpu_online(curr_cpu))
1137 break;
1138
1139 if (curr_cpu >= nr_cpu_ids)
1140 curr_cpu = IRQ_UNBOUND;
1141 }
1142
1143 vq->irq_effective_cpu = curr_cpu;
1144 }
1145
vduse_dev_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1146 static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
1147 unsigned long arg)
1148 {
1149 struct vduse_dev *dev = file->private_data;
1150 void __user *argp = (void __user *)arg;
1151 int ret;
1152
1153 if (unlikely(dev->broken))
1154 return -EPERM;
1155
1156 switch (cmd) {
1157 case VDUSE_IOTLB_GET_FD: {
1158 struct vduse_iotlb_entry entry;
1159 struct vhost_iotlb_map *map;
1160 struct vdpa_map_file *map_file;
1161 struct file *f = NULL;
1162
1163 ret = -EFAULT;
1164 if (copy_from_user(&entry, argp, sizeof(entry)))
1165 break;
1166
1167 ret = -EINVAL;
1168 if (entry.start > entry.last)
1169 break;
1170
1171 mutex_lock(&dev->domain_lock);
1172 if (!dev->domain) {
1173 mutex_unlock(&dev->domain_lock);
1174 break;
1175 }
1176 spin_lock(&dev->domain->iotlb_lock);
1177 map = vhost_iotlb_itree_first(dev->domain->iotlb,
1178 entry.start, entry.last);
1179 if (map) {
1180 map_file = (struct vdpa_map_file *)map->opaque;
1181 f = get_file(map_file->file);
1182 entry.offset = map_file->offset;
1183 entry.start = map->start;
1184 entry.last = map->last;
1185 entry.perm = map->perm;
1186 }
1187 spin_unlock(&dev->domain->iotlb_lock);
1188 mutex_unlock(&dev->domain_lock);
1189 ret = -EINVAL;
1190 if (!f)
1191 break;
1192
1193 ret = -EFAULT;
1194 if (copy_to_user(argp, &entry, sizeof(entry))) {
1195 fput(f);
1196 break;
1197 }
1198 ret = receive_fd(f, NULL, perm_to_file_flags(entry.perm));
1199 fput(f);
1200 break;
1201 }
1202 case VDUSE_DEV_GET_FEATURES:
1203 /*
1204 * Just mirror what driver wrote here.
1205 * The driver is expected to check FEATURE_OK later.
1206 */
1207 ret = put_user(dev->driver_features, (u64 __user *)argp);
1208 break;
1209 case VDUSE_DEV_SET_CONFIG: {
1210 struct vduse_config_data config;
1211 unsigned long size = offsetof(struct vduse_config_data,
1212 buffer);
1213
1214 ret = -EFAULT;
1215 if (copy_from_user(&config, argp, size))
1216 break;
1217
1218 ret = -EINVAL;
1219 if (config.offset > dev->config_size ||
1220 config.length == 0 ||
1221 config.length > dev->config_size - config.offset)
1222 break;
1223
1224 ret = -EFAULT;
1225 if (copy_from_user(dev->config + config.offset, argp + size,
1226 config.length))
1227 break;
1228
1229 ret = 0;
1230 break;
1231 }
1232 case VDUSE_DEV_INJECT_CONFIG_IRQ:
1233 ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
1234 break;
1235 case VDUSE_VQ_SETUP: {
1236 struct vduse_vq_config config;
1237 u32 index;
1238
1239 ret = -EFAULT;
1240 if (copy_from_user(&config, argp, sizeof(config)))
1241 break;
1242
1243 ret = -EINVAL;
1244 if (config.index >= dev->vq_num)
1245 break;
1246
1247 if (!is_mem_zero((const char *)config.reserved,
1248 sizeof(config.reserved)))
1249 break;
1250
1251 index = array_index_nospec(config.index, dev->vq_num);
1252 dev->vqs[index]->num_max = config.max_size;
1253 ret = 0;
1254 break;
1255 }
1256 case VDUSE_VQ_GET_INFO: {
1257 struct vduse_vq_info vq_info;
1258 struct vduse_virtqueue *vq;
1259 u32 index;
1260
1261 ret = -EFAULT;
1262 if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
1263 break;
1264
1265 ret = -EINVAL;
1266 if (vq_info.index >= dev->vq_num)
1267 break;
1268
1269 index = array_index_nospec(vq_info.index, dev->vq_num);
1270 vq = dev->vqs[index];
1271 vq_info.desc_addr = vq->desc_addr;
1272 vq_info.driver_addr = vq->driver_addr;
1273 vq_info.device_addr = vq->device_addr;
1274 vq_info.num = vq->num;
1275
1276 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
1277 vq_info.packed.last_avail_counter =
1278 vq->state.packed.last_avail_counter;
1279 vq_info.packed.last_avail_idx =
1280 vq->state.packed.last_avail_idx;
1281 vq_info.packed.last_used_counter =
1282 vq->state.packed.last_used_counter;
1283 vq_info.packed.last_used_idx =
1284 vq->state.packed.last_used_idx;
1285 } else
1286 vq_info.split.avail_index =
1287 vq->state.split.avail_index;
1288
1289 vq_info.ready = vq->ready;
1290
1291 ret = -EFAULT;
1292 if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
1293 break;
1294
1295 ret = 0;
1296 break;
1297 }
1298 case VDUSE_VQ_SETUP_KICKFD: {
1299 struct vduse_vq_eventfd eventfd;
1300
1301 ret = -EFAULT;
1302 if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
1303 break;
1304
1305 ret = vduse_kickfd_setup(dev, &eventfd);
1306 break;
1307 }
1308 case VDUSE_VQ_INJECT_IRQ: {
1309 u32 index;
1310
1311 ret = -EFAULT;
1312 if (get_user(index, (u32 __user *)argp))
1313 break;
1314
1315 ret = -EINVAL;
1316 if (index >= dev->vq_num)
1317 break;
1318
1319 ret = 0;
1320 index = array_index_nospec(index, dev->vq_num);
1321 if (!vduse_vq_signal_irqfd(dev->vqs[index])) {
1322 vduse_vq_update_effective_cpu(dev->vqs[index]);
1323 ret = vduse_dev_queue_irq_work(dev,
1324 &dev->vqs[index]->inject,
1325 dev->vqs[index]->irq_effective_cpu);
1326 }
1327 break;
1328 }
1329 case VDUSE_IOTLB_REG_UMEM: {
1330 struct vduse_iova_umem umem;
1331
1332 ret = -EFAULT;
1333 if (copy_from_user(&umem, argp, sizeof(umem)))
1334 break;
1335
1336 ret = -EINVAL;
1337 if (!is_mem_zero((const char *)umem.reserved,
1338 sizeof(umem.reserved)))
1339 break;
1340
1341 mutex_lock(&dev->domain_lock);
1342 ret = vduse_dev_reg_umem(dev, umem.iova,
1343 umem.uaddr, umem.size);
1344 mutex_unlock(&dev->domain_lock);
1345 break;
1346 }
1347 case VDUSE_IOTLB_DEREG_UMEM: {
1348 struct vduse_iova_umem umem;
1349
1350 ret = -EFAULT;
1351 if (copy_from_user(&umem, argp, sizeof(umem)))
1352 break;
1353
1354 ret = -EINVAL;
1355 if (!is_mem_zero((const char *)umem.reserved,
1356 sizeof(umem.reserved)))
1357 break;
1358 mutex_lock(&dev->domain_lock);
1359 ret = vduse_dev_dereg_umem(dev, umem.iova,
1360 umem.size);
1361 mutex_unlock(&dev->domain_lock);
1362 break;
1363 }
1364 case VDUSE_IOTLB_GET_INFO: {
1365 struct vduse_iova_info info;
1366 struct vhost_iotlb_map *map;
1367
1368 ret = -EFAULT;
1369 if (copy_from_user(&info, argp, sizeof(info)))
1370 break;
1371
1372 ret = -EINVAL;
1373 if (info.start > info.last)
1374 break;
1375
1376 if (!is_mem_zero((const char *)info.reserved,
1377 sizeof(info.reserved)))
1378 break;
1379
1380 mutex_lock(&dev->domain_lock);
1381 if (!dev->domain) {
1382 mutex_unlock(&dev->domain_lock);
1383 break;
1384 }
1385 spin_lock(&dev->domain->iotlb_lock);
1386 map = vhost_iotlb_itree_first(dev->domain->iotlb,
1387 info.start, info.last);
1388 if (map) {
1389 info.start = map->start;
1390 info.last = map->last;
1391 info.capability = 0;
1392 if (dev->domain->bounce_map && map->start == 0 &&
1393 map->last == dev->domain->bounce_size - 1)
1394 info.capability |= VDUSE_IOVA_CAP_UMEM;
1395 }
1396 spin_unlock(&dev->domain->iotlb_lock);
1397 mutex_unlock(&dev->domain_lock);
1398 if (!map)
1399 break;
1400
1401 ret = -EFAULT;
1402 if (copy_to_user(argp, &info, sizeof(info)))
1403 break;
1404
1405 ret = 0;
1406 break;
1407 }
1408 default:
1409 ret = -ENOIOCTLCMD;
1410 break;
1411 }
1412
1413 return ret;
1414 }
1415
vduse_dev_release(struct inode * inode,struct file * file)1416 static int vduse_dev_release(struct inode *inode, struct file *file)
1417 {
1418 struct vduse_dev *dev = file->private_data;
1419
1420 mutex_lock(&dev->domain_lock);
1421 if (dev->domain)
1422 vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
1423 mutex_unlock(&dev->domain_lock);
1424 spin_lock(&dev->msg_lock);
1425 /* Make sure the inflight messages can processed after reconncection */
1426 list_splice_init(&dev->recv_list, &dev->send_list);
1427 spin_unlock(&dev->msg_lock);
1428 dev->connected = false;
1429
1430 return 0;
1431 }
1432
vduse_dev_get_from_minor(int minor)1433 static struct vduse_dev *vduse_dev_get_from_minor(int minor)
1434 {
1435 struct vduse_dev *dev;
1436
1437 mutex_lock(&vduse_lock);
1438 dev = idr_find(&vduse_idr, minor);
1439 mutex_unlock(&vduse_lock);
1440
1441 return dev;
1442 }
1443
vduse_dev_open(struct inode * inode,struct file * file)1444 static int vduse_dev_open(struct inode *inode, struct file *file)
1445 {
1446 int ret;
1447 struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
1448
1449 if (!dev)
1450 return -ENODEV;
1451
1452 ret = -EBUSY;
1453 mutex_lock(&dev->lock);
1454 if (dev->connected)
1455 goto unlock;
1456
1457 ret = 0;
1458 dev->connected = true;
1459 file->private_data = dev;
1460 unlock:
1461 mutex_unlock(&dev->lock);
1462
1463 return ret;
1464 }
1465
1466 static const struct file_operations vduse_dev_fops = {
1467 .owner = THIS_MODULE,
1468 .open = vduse_dev_open,
1469 .release = vduse_dev_release,
1470 .read_iter = vduse_dev_read_iter,
1471 .write_iter = vduse_dev_write_iter,
1472 .poll = vduse_dev_poll,
1473 .unlocked_ioctl = vduse_dev_ioctl,
1474 .compat_ioctl = compat_ptr_ioctl,
1475 .llseek = noop_llseek,
1476 };
1477
irq_cb_affinity_show(struct vduse_virtqueue * vq,char * buf)1478 static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf)
1479 {
1480 return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity));
1481 }
1482
irq_cb_affinity_store(struct vduse_virtqueue * vq,const char * buf,size_t count)1483 static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq,
1484 const char *buf, size_t count)
1485 {
1486 cpumask_var_t new_value;
1487 int ret;
1488
1489 if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
1490 return -ENOMEM;
1491
1492 ret = cpumask_parse(buf, new_value);
1493 if (ret)
1494 goto free_mask;
1495
1496 ret = -EINVAL;
1497 if (!cpumask_intersects(new_value, cpu_online_mask))
1498 goto free_mask;
1499
1500 cpumask_copy(&vq->irq_affinity, new_value);
1501 ret = count;
1502 free_mask:
1503 free_cpumask_var(new_value);
1504 return ret;
1505 }
1506
1507 struct vq_sysfs_entry {
1508 struct attribute attr;
1509 ssize_t (*show)(struct vduse_virtqueue *vq, char *buf);
1510 ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf,
1511 size_t count);
1512 };
1513
1514 static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity);
1515
1516 static struct attribute *vq_attrs[] = {
1517 &irq_cb_affinity_attr.attr,
1518 NULL,
1519 };
1520 ATTRIBUTE_GROUPS(vq);
1521
vq_attr_show(struct kobject * kobj,struct attribute * attr,char * buf)1522 static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr,
1523 char *buf)
1524 {
1525 struct vduse_virtqueue *vq = container_of(kobj,
1526 struct vduse_virtqueue, kobj);
1527 struct vq_sysfs_entry *entry = container_of(attr,
1528 struct vq_sysfs_entry, attr);
1529
1530 if (!entry->show)
1531 return -EIO;
1532
1533 return entry->show(vq, buf);
1534 }
1535
vq_attr_store(struct kobject * kobj,struct attribute * attr,const char * buf,size_t count)1536 static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr,
1537 const char *buf, size_t count)
1538 {
1539 struct vduse_virtqueue *vq = container_of(kobj,
1540 struct vduse_virtqueue, kobj);
1541 struct vq_sysfs_entry *entry = container_of(attr,
1542 struct vq_sysfs_entry, attr);
1543
1544 if (!entry->store)
1545 return -EIO;
1546
1547 return entry->store(vq, buf, count);
1548 }
1549
1550 static const struct sysfs_ops vq_sysfs_ops = {
1551 .show = vq_attr_show,
1552 .store = vq_attr_store,
1553 };
1554
vq_release(struct kobject * kobj)1555 static void vq_release(struct kobject *kobj)
1556 {
1557 struct vduse_virtqueue *vq = container_of(kobj,
1558 struct vduse_virtqueue, kobj);
1559 kfree(vq);
1560 }
1561
1562 static const struct kobj_type vq_type = {
1563 .release = vq_release,
1564 .sysfs_ops = &vq_sysfs_ops,
1565 .default_groups = vq_groups,
1566 };
1567
vduse_devnode(const struct device * dev,umode_t * mode)1568 static char *vduse_devnode(const struct device *dev, umode_t *mode)
1569 {
1570 return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
1571 }
1572
1573 static const struct class vduse_class = {
1574 .name = "vduse",
1575 .devnode = vduse_devnode,
1576 };
1577
vduse_dev_deinit_vqs(struct vduse_dev * dev)1578 static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
1579 {
1580 int i;
1581
1582 if (!dev->vqs)
1583 return;
1584
1585 for (i = 0; i < dev->vq_num; i++)
1586 kobject_put(&dev->vqs[i]->kobj);
1587 kfree(dev->vqs);
1588 }
1589
vduse_dev_init_vqs(struct vduse_dev * dev,u32 vq_align,u32 vq_num)1590 static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
1591 {
1592 int ret, i;
1593
1594 dev->vq_align = vq_align;
1595 dev->vq_num = vq_num;
1596 dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
1597 if (!dev->vqs)
1598 return -ENOMEM;
1599
1600 for (i = 0; i < vq_num; i++) {
1601 dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL);
1602 if (!dev->vqs[i]) {
1603 ret = -ENOMEM;
1604 goto err;
1605 }
1606
1607 dev->vqs[i]->index = i;
1608 dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
1609 INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
1610 INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
1611 spin_lock_init(&dev->vqs[i]->kick_lock);
1612 spin_lock_init(&dev->vqs[i]->irq_lock);
1613 cpumask_setall(&dev->vqs[i]->irq_affinity);
1614
1615 kobject_init(&dev->vqs[i]->kobj, &vq_type);
1616 ret = kobject_add(&dev->vqs[i]->kobj,
1617 &dev->dev->kobj, "vq%d", i);
1618 if (ret) {
1619 kfree(dev->vqs[i]);
1620 goto err;
1621 }
1622 }
1623
1624 return 0;
1625 err:
1626 while (i--)
1627 kobject_put(&dev->vqs[i]->kobj);
1628 kfree(dev->vqs);
1629 dev->vqs = NULL;
1630 return ret;
1631 }
1632
vduse_dev_create(void)1633 static struct vduse_dev *vduse_dev_create(void)
1634 {
1635 struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1636
1637 if (!dev)
1638 return NULL;
1639
1640 mutex_init(&dev->lock);
1641 mutex_init(&dev->mem_lock);
1642 mutex_init(&dev->domain_lock);
1643 spin_lock_init(&dev->msg_lock);
1644 INIT_LIST_HEAD(&dev->send_list);
1645 INIT_LIST_HEAD(&dev->recv_list);
1646 spin_lock_init(&dev->irq_lock);
1647 init_rwsem(&dev->rwsem);
1648
1649 INIT_WORK(&dev->inject, vduse_dev_irq_inject);
1650 init_waitqueue_head(&dev->waitq);
1651
1652 return dev;
1653 }
1654
vduse_dev_destroy(struct vduse_dev * dev)1655 static void vduse_dev_destroy(struct vduse_dev *dev)
1656 {
1657 kfree(dev);
1658 }
1659
vduse_find_dev(const char * name)1660 static struct vduse_dev *vduse_find_dev(const char *name)
1661 {
1662 struct vduse_dev *dev;
1663 int id;
1664
1665 idr_for_each_entry(&vduse_idr, dev, id)
1666 if (!strcmp(dev->name, name))
1667 return dev;
1668
1669 return NULL;
1670 }
1671
vduse_destroy_dev(char * name)1672 static int vduse_destroy_dev(char *name)
1673 {
1674 struct vduse_dev *dev = vduse_find_dev(name);
1675
1676 if (!dev)
1677 return -EINVAL;
1678
1679 mutex_lock(&dev->lock);
1680 if (dev->vdev || dev->connected) {
1681 mutex_unlock(&dev->lock);
1682 return -EBUSY;
1683 }
1684 dev->connected = true;
1685 mutex_unlock(&dev->lock);
1686
1687 vduse_dev_reset(dev);
1688 device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1689 idr_remove(&vduse_idr, dev->minor);
1690 kvfree(dev->config);
1691 vduse_dev_deinit_vqs(dev);
1692 if (dev->domain)
1693 vduse_domain_destroy(dev->domain);
1694 kfree(dev->name);
1695 vduse_dev_destroy(dev);
1696 module_put(THIS_MODULE);
1697
1698 return 0;
1699 }
1700
device_is_allowed(u32 device_id)1701 static bool device_is_allowed(u32 device_id)
1702 {
1703 int i;
1704
1705 for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
1706 if (allowed_device_id[i] == device_id)
1707 return true;
1708
1709 return false;
1710 }
1711
features_is_valid(struct vduse_dev_config * config)1712 static bool features_is_valid(struct vduse_dev_config *config)
1713 {
1714 if (!(config->features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1715 return false;
1716
1717 /* Now we only support read-only configuration space */
1718 if ((config->device_id == VIRTIO_ID_BLOCK) &&
1719 (config->features & BIT_ULL(VIRTIO_BLK_F_CONFIG_WCE)))
1720 return false;
1721 else if ((config->device_id == VIRTIO_ID_NET) &&
1722 (config->features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
1723 return false;
1724
1725 if ((config->device_id == VIRTIO_ID_NET) &&
1726 !(config->features & BIT_ULL(VIRTIO_F_VERSION_1)))
1727 return false;
1728
1729 return true;
1730 }
1731
vduse_validate_config(struct vduse_dev_config * config)1732 static bool vduse_validate_config(struct vduse_dev_config *config)
1733 {
1734 if (!is_mem_zero((const char *)config->reserved,
1735 sizeof(config->reserved)))
1736 return false;
1737
1738 if (config->vq_align > PAGE_SIZE)
1739 return false;
1740
1741 if (config->config_size > PAGE_SIZE)
1742 return false;
1743
1744 if (config->vq_num > 0xffff)
1745 return false;
1746
1747 if (!config->name[0])
1748 return false;
1749
1750 if (!device_is_allowed(config->device_id))
1751 return false;
1752
1753 if (!features_is_valid(config))
1754 return false;
1755
1756 return true;
1757 }
1758
msg_timeout_show(struct device * device,struct device_attribute * attr,char * buf)1759 static ssize_t msg_timeout_show(struct device *device,
1760 struct device_attribute *attr, char *buf)
1761 {
1762 struct vduse_dev *dev = dev_get_drvdata(device);
1763
1764 return sysfs_emit(buf, "%u\n", dev->msg_timeout);
1765 }
1766
msg_timeout_store(struct device * device,struct device_attribute * attr,const char * buf,size_t count)1767 static ssize_t msg_timeout_store(struct device *device,
1768 struct device_attribute *attr,
1769 const char *buf, size_t count)
1770 {
1771 struct vduse_dev *dev = dev_get_drvdata(device);
1772 int ret;
1773
1774 ret = kstrtouint(buf, 10, &dev->msg_timeout);
1775 if (ret < 0)
1776 return ret;
1777
1778 return count;
1779 }
1780
1781 static DEVICE_ATTR_RW(msg_timeout);
1782
bounce_size_show(struct device * device,struct device_attribute * attr,char * buf)1783 static ssize_t bounce_size_show(struct device *device,
1784 struct device_attribute *attr, char *buf)
1785 {
1786 struct vduse_dev *dev = dev_get_drvdata(device);
1787
1788 return sysfs_emit(buf, "%u\n", dev->bounce_size);
1789 }
1790
bounce_size_store(struct device * device,struct device_attribute * attr,const char * buf,size_t count)1791 static ssize_t bounce_size_store(struct device *device,
1792 struct device_attribute *attr,
1793 const char *buf, size_t count)
1794 {
1795 struct vduse_dev *dev = dev_get_drvdata(device);
1796 unsigned int bounce_size;
1797 int ret;
1798
1799 ret = -EPERM;
1800 mutex_lock(&dev->domain_lock);
1801 if (dev->domain)
1802 goto unlock;
1803
1804 ret = kstrtouint(buf, 10, &bounce_size);
1805 if (ret < 0)
1806 goto unlock;
1807
1808 ret = -EINVAL;
1809 if (bounce_size > VDUSE_MAX_BOUNCE_SIZE ||
1810 bounce_size < VDUSE_MIN_BOUNCE_SIZE)
1811 goto unlock;
1812
1813 dev->bounce_size = bounce_size & PAGE_MASK;
1814 ret = count;
1815 unlock:
1816 mutex_unlock(&dev->domain_lock);
1817 return ret;
1818 }
1819
1820 static DEVICE_ATTR_RW(bounce_size);
1821
1822 static struct attribute *vduse_dev_attrs[] = {
1823 &dev_attr_msg_timeout.attr,
1824 &dev_attr_bounce_size.attr,
1825 NULL
1826 };
1827
1828 ATTRIBUTE_GROUPS(vduse_dev);
1829
vduse_create_dev(struct vduse_dev_config * config,void * config_buf,u64 api_version)1830 static int vduse_create_dev(struct vduse_dev_config *config,
1831 void *config_buf, u64 api_version)
1832 {
1833 int ret;
1834 struct vduse_dev *dev;
1835
1836 ret = -EPERM;
1837 if ((config->device_id == VIRTIO_ID_NET) && !capable(CAP_NET_ADMIN))
1838 goto err;
1839
1840 ret = -EEXIST;
1841 if (vduse_find_dev(config->name))
1842 goto err;
1843
1844 ret = -ENOMEM;
1845 dev = vduse_dev_create();
1846 if (!dev)
1847 goto err;
1848
1849 dev->api_version = api_version;
1850 dev->device_features = config->features;
1851 dev->device_id = config->device_id;
1852 dev->vendor_id = config->vendor_id;
1853 dev->name = kstrdup(config->name, GFP_KERNEL);
1854 if (!dev->name)
1855 goto err_str;
1856
1857 dev->bounce_size = VDUSE_BOUNCE_SIZE;
1858 dev->config = config_buf;
1859 dev->config_size = config->config_size;
1860
1861 ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
1862 if (ret < 0)
1863 goto err_idr;
1864
1865 dev->minor = ret;
1866 dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
1867 dev->dev = device_create_with_groups(&vduse_class, NULL,
1868 MKDEV(MAJOR(vduse_major), dev->minor),
1869 dev, vduse_dev_groups, "%s", config->name);
1870 if (IS_ERR(dev->dev)) {
1871 ret = PTR_ERR(dev->dev);
1872 goto err_dev;
1873 }
1874
1875 ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num);
1876 if (ret)
1877 goto err_vqs;
1878
1879 __module_get(THIS_MODULE);
1880
1881 return 0;
1882 err_vqs:
1883 device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1884 err_dev:
1885 idr_remove(&vduse_idr, dev->minor);
1886 err_idr:
1887 kfree(dev->name);
1888 err_str:
1889 vduse_dev_destroy(dev);
1890 err:
1891 return ret;
1892 }
1893
vduse_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1894 static long vduse_ioctl(struct file *file, unsigned int cmd,
1895 unsigned long arg)
1896 {
1897 int ret;
1898 void __user *argp = (void __user *)arg;
1899 struct vduse_control *control = file->private_data;
1900
1901 mutex_lock(&vduse_lock);
1902 switch (cmd) {
1903 case VDUSE_GET_API_VERSION:
1904 ret = put_user(control->api_version, (u64 __user *)argp);
1905 break;
1906 case VDUSE_SET_API_VERSION: {
1907 u64 api_version;
1908
1909 ret = -EFAULT;
1910 if (get_user(api_version, (u64 __user *)argp))
1911 break;
1912
1913 ret = -EINVAL;
1914 if (api_version > VDUSE_API_VERSION)
1915 break;
1916
1917 ret = 0;
1918 control->api_version = api_version;
1919 break;
1920 }
1921 case VDUSE_CREATE_DEV: {
1922 struct vduse_dev_config config;
1923 unsigned long size = offsetof(struct vduse_dev_config, config);
1924 void *buf;
1925
1926 ret = -EFAULT;
1927 if (copy_from_user(&config, argp, size))
1928 break;
1929
1930 ret = -EINVAL;
1931 if (vduse_validate_config(&config) == false)
1932 break;
1933
1934 buf = vmemdup_user(argp + size, config.config_size);
1935 if (IS_ERR(buf)) {
1936 ret = PTR_ERR(buf);
1937 break;
1938 }
1939 config.name[VDUSE_NAME_MAX - 1] = '\0';
1940 ret = vduse_create_dev(&config, buf, control->api_version);
1941 if (ret)
1942 kvfree(buf);
1943 break;
1944 }
1945 case VDUSE_DESTROY_DEV: {
1946 char name[VDUSE_NAME_MAX];
1947
1948 ret = -EFAULT;
1949 if (copy_from_user(name, argp, VDUSE_NAME_MAX))
1950 break;
1951
1952 name[VDUSE_NAME_MAX - 1] = '\0';
1953 ret = vduse_destroy_dev(name);
1954 break;
1955 }
1956 default:
1957 ret = -EINVAL;
1958 break;
1959 }
1960 mutex_unlock(&vduse_lock);
1961
1962 return ret;
1963 }
1964
vduse_release(struct inode * inode,struct file * file)1965 static int vduse_release(struct inode *inode, struct file *file)
1966 {
1967 struct vduse_control *control = file->private_data;
1968
1969 kfree(control);
1970 return 0;
1971 }
1972
vduse_open(struct inode * inode,struct file * file)1973 static int vduse_open(struct inode *inode, struct file *file)
1974 {
1975 struct vduse_control *control;
1976
1977 control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
1978 if (!control)
1979 return -ENOMEM;
1980
1981 control->api_version = VDUSE_API_VERSION;
1982 file->private_data = control;
1983
1984 return 0;
1985 }
1986
1987 static const struct file_operations vduse_ctrl_fops = {
1988 .owner = THIS_MODULE,
1989 .open = vduse_open,
1990 .release = vduse_release,
1991 .unlocked_ioctl = vduse_ioctl,
1992 .compat_ioctl = compat_ptr_ioctl,
1993 .llseek = noop_llseek,
1994 };
1995
1996 struct vduse_mgmt_dev {
1997 struct vdpa_mgmt_dev mgmt_dev;
1998 struct device dev;
1999 };
2000
2001 static struct vduse_mgmt_dev *vduse_mgmt;
2002
vduse_dev_init_vdpa(struct vduse_dev * dev,const char * name)2003 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
2004 {
2005 struct vduse_vdpa *vdev;
2006 int ret;
2007
2008 if (dev->vdev)
2009 return -EEXIST;
2010
2011 vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
2012 &vduse_vdpa_config_ops, 1, 1, name, true);
2013 if (IS_ERR(vdev))
2014 return PTR_ERR(vdev);
2015
2016 dev->vdev = vdev;
2017 vdev->dev = dev;
2018 vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
2019 ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
2020 if (ret) {
2021 put_device(&vdev->vdpa.dev);
2022 return ret;
2023 }
2024 set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
2025 vdev->vdpa.dma_dev = &vdev->vdpa.dev;
2026 vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
2027
2028 return 0;
2029 }
2030
vdpa_dev_add(struct vdpa_mgmt_dev * mdev,const char * name,const struct vdpa_dev_set_config * config)2031 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
2032 const struct vdpa_dev_set_config *config)
2033 {
2034 struct vduse_dev *dev;
2035 int ret;
2036
2037 mutex_lock(&vduse_lock);
2038 dev = vduse_find_dev(name);
2039 if (!dev || !vduse_dev_is_ready(dev)) {
2040 mutex_unlock(&vduse_lock);
2041 return -EINVAL;
2042 }
2043 ret = vduse_dev_init_vdpa(dev, name);
2044 mutex_unlock(&vduse_lock);
2045 if (ret)
2046 return ret;
2047
2048 mutex_lock(&dev->domain_lock);
2049 if (!dev->domain)
2050 dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
2051 dev->bounce_size);
2052 mutex_unlock(&dev->domain_lock);
2053 if (!dev->domain) {
2054 put_device(&dev->vdev->vdpa.dev);
2055 return -ENOMEM;
2056 }
2057
2058 ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
2059 if (ret) {
2060 put_device(&dev->vdev->vdpa.dev);
2061 mutex_lock(&dev->domain_lock);
2062 vduse_domain_destroy(dev->domain);
2063 dev->domain = NULL;
2064 mutex_unlock(&dev->domain_lock);
2065 return ret;
2066 }
2067
2068 return 0;
2069 }
2070
vdpa_dev_del(struct vdpa_mgmt_dev * mdev,struct vdpa_device * dev)2071 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
2072 {
2073 _vdpa_unregister_device(dev);
2074 }
2075
2076 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
2077 .dev_add = vdpa_dev_add,
2078 .dev_del = vdpa_dev_del,
2079 };
2080
2081 static struct virtio_device_id id_table[] = {
2082 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
2083 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
2084 { 0 },
2085 };
2086
vduse_mgmtdev_release(struct device * dev)2087 static void vduse_mgmtdev_release(struct device *dev)
2088 {
2089 struct vduse_mgmt_dev *mgmt_dev;
2090
2091 mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
2092 kfree(mgmt_dev);
2093 }
2094
vduse_mgmtdev_init(void)2095 static int vduse_mgmtdev_init(void)
2096 {
2097 int ret;
2098
2099 vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
2100 if (!vduse_mgmt)
2101 return -ENOMEM;
2102
2103 ret = dev_set_name(&vduse_mgmt->dev, "vduse");
2104 if (ret) {
2105 kfree(vduse_mgmt);
2106 return ret;
2107 }
2108
2109 vduse_mgmt->dev.release = vduse_mgmtdev_release;
2110
2111 ret = device_register(&vduse_mgmt->dev);
2112 if (ret)
2113 goto dev_reg_err;
2114
2115 vduse_mgmt->mgmt_dev.id_table = id_table;
2116 vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
2117 vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
2118 ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
2119 if (ret)
2120 device_unregister(&vduse_mgmt->dev);
2121
2122 return ret;
2123
2124 dev_reg_err:
2125 put_device(&vduse_mgmt->dev);
2126 return ret;
2127 }
2128
vduse_mgmtdev_exit(void)2129 static void vduse_mgmtdev_exit(void)
2130 {
2131 vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
2132 device_unregister(&vduse_mgmt->dev);
2133 }
2134
vduse_init(void)2135 static int vduse_init(void)
2136 {
2137 int ret;
2138 struct device *dev;
2139
2140 ret = class_register(&vduse_class);
2141 if (ret)
2142 return ret;
2143
2144 ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
2145 if (ret)
2146 goto err_chardev_region;
2147
2148 /* /dev/vduse/control */
2149 cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
2150 vduse_ctrl_cdev.owner = THIS_MODULE;
2151 ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
2152 if (ret)
2153 goto err_ctrl_cdev;
2154
2155 dev = device_create(&vduse_class, NULL, vduse_major, NULL, "control");
2156 if (IS_ERR(dev)) {
2157 ret = PTR_ERR(dev);
2158 goto err_device;
2159 }
2160
2161 /* /dev/vduse/$DEVICE */
2162 cdev_init(&vduse_cdev, &vduse_dev_fops);
2163 vduse_cdev.owner = THIS_MODULE;
2164 ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
2165 VDUSE_DEV_MAX - 1);
2166 if (ret)
2167 goto err_cdev;
2168
2169 ret = -ENOMEM;
2170 vduse_irq_wq = alloc_workqueue("vduse-irq",
2171 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
2172 if (!vduse_irq_wq)
2173 goto err_wq;
2174
2175 vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
2176 if (!vduse_irq_bound_wq)
2177 goto err_bound_wq;
2178
2179 ret = vduse_domain_init();
2180 if (ret)
2181 goto err_domain;
2182
2183 ret = vduse_mgmtdev_init();
2184 if (ret)
2185 goto err_mgmtdev;
2186
2187 return 0;
2188 err_mgmtdev:
2189 vduse_domain_exit();
2190 err_domain:
2191 destroy_workqueue(vduse_irq_bound_wq);
2192 err_bound_wq:
2193 destroy_workqueue(vduse_irq_wq);
2194 err_wq:
2195 cdev_del(&vduse_cdev);
2196 err_cdev:
2197 device_destroy(&vduse_class, vduse_major);
2198 err_device:
2199 cdev_del(&vduse_ctrl_cdev);
2200 err_ctrl_cdev:
2201 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2202 err_chardev_region:
2203 class_unregister(&vduse_class);
2204 return ret;
2205 }
2206 module_init(vduse_init);
2207
vduse_exit(void)2208 static void vduse_exit(void)
2209 {
2210 vduse_mgmtdev_exit();
2211 vduse_domain_exit();
2212 destroy_workqueue(vduse_irq_bound_wq);
2213 destroy_workqueue(vduse_irq_wq);
2214 cdev_del(&vduse_cdev);
2215 device_destroy(&vduse_class, vduse_major);
2216 cdev_del(&vduse_ctrl_cdev);
2217 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2218 class_unregister(&vduse_class);
2219 }
2220 module_exit(vduse_exit);
2221
2222 MODULE_LICENSE(DRV_LICENSE);
2223 MODULE_AUTHOR(DRV_AUTHOR);
2224 MODULE_DESCRIPTION(DRV_DESC);
2225