1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * VDUSE: vDPA Device in Userspace 4 * 5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved. 6 * 7 * Author: Xie Yongji <xieyongji@bytedance.com> 8 * 9 */ 10 11 #include "linux/virtio_net.h" 12 #include <linux/init.h> 13 #include <linux/module.h> 14 #include <linux/cdev.h> 15 #include <linux/device.h> 16 #include <linux/eventfd.h> 17 #include <linux/slab.h> 18 #include <linux/wait.h> 19 #include <linux/dma-map-ops.h> 20 #include <linux/poll.h> 21 #include <linux/file.h> 22 #include <linux/uio.h> 23 #include <linux/vdpa.h> 24 #include <linux/nospec.h> 25 #include <linux/vmalloc.h> 26 #include <linux/sched/mm.h> 27 #include <uapi/linux/vduse.h> 28 #include <uapi/linux/vdpa.h> 29 #include <uapi/linux/virtio_config.h> 30 #include <uapi/linux/virtio_ids.h> 31 #include <uapi/linux/virtio_blk.h> 32 #include <uapi/linux/virtio_ring.h> 33 #include <linux/mod_devicetable.h> 34 35 #include "iova_domain.h" 36 37 #define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>" 38 #define DRV_DESC "vDPA Device in Userspace" 39 #define DRV_LICENSE "GPL v2" 40 41 #define VDUSE_DEV_MAX (1U << MINORBITS) 42 #define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024) 43 #define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024) 44 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024) 45 /* 128 MB reserved for virtqueue creation */ 46 #define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024) 47 #define VDUSE_MSG_DEFAULT_TIMEOUT 30 48 49 #define IRQ_UNBOUND -1 50 51 struct vduse_virtqueue { 52 u16 index; 53 u16 num_max; 54 u32 num; 55 u64 desc_addr; 56 u64 driver_addr; 57 u64 device_addr; 58 struct vdpa_vq_state state; 59 bool ready; 60 bool kicked; 61 spinlock_t kick_lock; 62 spinlock_t irq_lock; 63 struct eventfd_ctx *kickfd; 64 struct vdpa_callback cb; 65 struct work_struct inject; 66 struct work_struct kick; 67 int irq_effective_cpu; 68 struct cpumask irq_affinity; 69 struct kobject kobj; 70 }; 71 72 struct vduse_dev; 73 74 struct vduse_vdpa { 75 struct vdpa_device vdpa; 76 struct vduse_dev *dev; 77 }; 78 79 struct vduse_umem { 80 unsigned long iova; 81 unsigned long npages; 82 struct page **pages; 83 struct mm_struct *mm; 84 }; 85 86 struct vduse_dev { 87 struct vduse_vdpa *vdev; 88 struct device *dev; 89 struct vduse_virtqueue **vqs; 90 struct vduse_iova_domain *domain; 91 char *name; 92 struct mutex lock; 93 spinlock_t msg_lock; 94 u64 msg_unique; 95 u32 msg_timeout; 96 wait_queue_head_t waitq; 97 struct list_head send_list; 98 struct list_head recv_list; 99 struct vdpa_callback config_cb; 100 struct work_struct inject; 101 spinlock_t irq_lock; 102 struct rw_semaphore rwsem; 103 int minor; 104 bool broken; 105 bool connected; 106 u64 api_version; 107 u64 device_features; 108 u64 driver_features; 109 u32 device_id; 110 u32 vendor_id; 111 u32 generation; 112 u32 config_size; 113 void *config; 114 u8 status; 115 u32 vq_num; 116 u32 vq_align; 117 struct vduse_umem *umem; 118 struct mutex mem_lock; 119 unsigned int bounce_size; 120 struct mutex domain_lock; 121 }; 122 123 struct vduse_dev_msg { 124 struct vduse_dev_request req; 125 struct vduse_dev_response resp; 126 struct list_head list; 127 wait_queue_head_t waitq; 128 bool completed; 129 }; 130 131 struct vduse_control { 132 u64 api_version; 133 }; 134 135 static DEFINE_MUTEX(vduse_lock); 136 static DEFINE_IDR(vduse_idr); 137 138 static dev_t vduse_major; 139 static struct cdev vduse_ctrl_cdev; 140 static struct cdev vduse_cdev; 141 static struct workqueue_struct *vduse_irq_wq; 142 static struct workqueue_struct *vduse_irq_bound_wq; 143 144 static u32 allowed_device_id[] = { 145 VIRTIO_ID_BLOCK, 146 VIRTIO_ID_NET, 147 VIRTIO_ID_FS, 148 }; 149 150 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa) 151 { 152 struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa); 153 154 return vdev->dev; 155 } 156 157 static inline struct vduse_dev *dev_to_vduse(struct device *dev) 158 { 159 struct vdpa_device *vdpa = dev_to_vdpa(dev); 160 161 return vdpa_to_vduse(vdpa); 162 } 163 164 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head, 165 uint32_t request_id) 166 { 167 struct vduse_dev_msg *msg; 168 169 list_for_each_entry(msg, head, list) { 170 if (msg->req.request_id == request_id) { 171 list_del(&msg->list); 172 return msg; 173 } 174 } 175 176 return NULL; 177 } 178 179 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head) 180 { 181 struct vduse_dev_msg *msg = NULL; 182 183 if (!list_empty(head)) { 184 msg = list_first_entry(head, struct vduse_dev_msg, list); 185 list_del(&msg->list); 186 } 187 188 return msg; 189 } 190 191 static void vduse_enqueue_msg(struct list_head *head, 192 struct vduse_dev_msg *msg) 193 { 194 list_add_tail(&msg->list, head); 195 } 196 197 static void vduse_dev_broken(struct vduse_dev *dev) 198 { 199 struct vduse_dev_msg *msg, *tmp; 200 201 if (unlikely(dev->broken)) 202 return; 203 204 list_splice_init(&dev->recv_list, &dev->send_list); 205 list_for_each_entry_safe(msg, tmp, &dev->send_list, list) { 206 list_del(&msg->list); 207 msg->completed = 1; 208 msg->resp.result = VDUSE_REQ_RESULT_FAILED; 209 wake_up(&msg->waitq); 210 } 211 dev->broken = true; 212 wake_up(&dev->waitq); 213 } 214 215 static int vduse_dev_msg_sync(struct vduse_dev *dev, 216 struct vduse_dev_msg *msg) 217 { 218 int ret; 219 220 if (unlikely(dev->broken)) 221 return -EIO; 222 223 init_waitqueue_head(&msg->waitq); 224 spin_lock(&dev->msg_lock); 225 if (unlikely(dev->broken)) { 226 spin_unlock(&dev->msg_lock); 227 return -EIO; 228 } 229 msg->req.request_id = dev->msg_unique++; 230 vduse_enqueue_msg(&dev->send_list, msg); 231 wake_up(&dev->waitq); 232 spin_unlock(&dev->msg_lock); 233 if (dev->msg_timeout) 234 ret = wait_event_killable_timeout(msg->waitq, msg->completed, 235 (long)dev->msg_timeout * HZ); 236 else 237 ret = wait_event_killable(msg->waitq, msg->completed); 238 239 spin_lock(&dev->msg_lock); 240 if (!msg->completed) { 241 list_del(&msg->list); 242 msg->resp.result = VDUSE_REQ_RESULT_FAILED; 243 /* Mark the device as malfunction when there is a timeout */ 244 if (!ret) 245 vduse_dev_broken(dev); 246 } 247 ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO; 248 spin_unlock(&dev->msg_lock); 249 250 return ret; 251 } 252 253 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev, 254 struct vduse_virtqueue *vq, 255 struct vdpa_vq_state_packed *packed) 256 { 257 struct vduse_dev_msg msg = { 0 }; 258 int ret; 259 260 msg.req.type = VDUSE_GET_VQ_STATE; 261 msg.req.vq_state.index = vq->index; 262 263 ret = vduse_dev_msg_sync(dev, &msg); 264 if (ret) 265 return ret; 266 267 packed->last_avail_counter = 268 msg.resp.vq_state.packed.last_avail_counter & 0x0001; 269 packed->last_avail_idx = 270 msg.resp.vq_state.packed.last_avail_idx & 0x7FFF; 271 packed->last_used_counter = 272 msg.resp.vq_state.packed.last_used_counter & 0x0001; 273 packed->last_used_idx = 274 msg.resp.vq_state.packed.last_used_idx & 0x7FFF; 275 276 return 0; 277 } 278 279 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev, 280 struct vduse_virtqueue *vq, 281 struct vdpa_vq_state_split *split) 282 { 283 struct vduse_dev_msg msg = { 0 }; 284 int ret; 285 286 msg.req.type = VDUSE_GET_VQ_STATE; 287 msg.req.vq_state.index = vq->index; 288 289 ret = vduse_dev_msg_sync(dev, &msg); 290 if (ret) 291 return ret; 292 293 split->avail_index = msg.resp.vq_state.split.avail_index; 294 295 return 0; 296 } 297 298 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status) 299 { 300 struct vduse_dev_msg msg = { 0 }; 301 302 msg.req.type = VDUSE_SET_STATUS; 303 msg.req.s.status = status; 304 305 return vduse_dev_msg_sync(dev, &msg); 306 } 307 308 static int vduse_dev_update_iotlb(struct vduse_dev *dev, 309 u64 start, u64 last) 310 { 311 struct vduse_dev_msg msg = { 0 }; 312 313 if (last < start) 314 return -EINVAL; 315 316 msg.req.type = VDUSE_UPDATE_IOTLB; 317 msg.req.iova.start = start; 318 msg.req.iova.last = last; 319 320 return vduse_dev_msg_sync(dev, &msg); 321 } 322 323 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to) 324 { 325 struct file *file = iocb->ki_filp; 326 struct vduse_dev *dev = file->private_data; 327 struct vduse_dev_msg *msg; 328 int size = sizeof(struct vduse_dev_request); 329 ssize_t ret; 330 331 if (iov_iter_count(to) < size) 332 return -EINVAL; 333 334 spin_lock(&dev->msg_lock); 335 while (1) { 336 msg = vduse_dequeue_msg(&dev->send_list); 337 if (msg) 338 break; 339 340 ret = -EAGAIN; 341 if (file->f_flags & O_NONBLOCK) 342 goto unlock; 343 344 spin_unlock(&dev->msg_lock); 345 ret = wait_event_interruptible_exclusive(dev->waitq, 346 !list_empty(&dev->send_list)); 347 if (ret) 348 return ret; 349 350 spin_lock(&dev->msg_lock); 351 } 352 spin_unlock(&dev->msg_lock); 353 ret = copy_to_iter(&msg->req, size, to); 354 spin_lock(&dev->msg_lock); 355 if (ret != size) { 356 ret = -EFAULT; 357 vduse_enqueue_msg(&dev->send_list, msg); 358 goto unlock; 359 } 360 vduse_enqueue_msg(&dev->recv_list, msg); 361 unlock: 362 spin_unlock(&dev->msg_lock); 363 364 return ret; 365 } 366 367 static bool is_mem_zero(const char *ptr, int size) 368 { 369 int i; 370 371 for (i = 0; i < size; i++) { 372 if (ptr[i]) 373 return false; 374 } 375 return true; 376 } 377 378 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from) 379 { 380 struct file *file = iocb->ki_filp; 381 struct vduse_dev *dev = file->private_data; 382 struct vduse_dev_response resp; 383 struct vduse_dev_msg *msg; 384 size_t ret; 385 386 ret = copy_from_iter(&resp, sizeof(resp), from); 387 if (ret != sizeof(resp)) 388 return -EINVAL; 389 390 if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved))) 391 return -EINVAL; 392 393 spin_lock(&dev->msg_lock); 394 msg = vduse_find_msg(&dev->recv_list, resp.request_id); 395 if (!msg) { 396 ret = -ENOENT; 397 goto unlock; 398 } 399 400 memcpy(&msg->resp, &resp, sizeof(resp)); 401 msg->completed = 1; 402 wake_up(&msg->waitq); 403 unlock: 404 spin_unlock(&dev->msg_lock); 405 406 return ret; 407 } 408 409 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait) 410 { 411 struct vduse_dev *dev = file->private_data; 412 __poll_t mask = 0; 413 414 poll_wait(file, &dev->waitq, wait); 415 416 spin_lock(&dev->msg_lock); 417 418 if (unlikely(dev->broken)) 419 mask |= EPOLLERR; 420 if (!list_empty(&dev->send_list)) 421 mask |= EPOLLIN | EPOLLRDNORM; 422 if (!list_empty(&dev->recv_list)) 423 mask |= EPOLLOUT | EPOLLWRNORM; 424 425 spin_unlock(&dev->msg_lock); 426 427 return mask; 428 } 429 430 static void vduse_dev_reset(struct vduse_dev *dev) 431 { 432 int i; 433 struct vduse_iova_domain *domain = dev->domain; 434 435 /* The coherent mappings are handled in vduse_dev_free_coherent() */ 436 if (domain && domain->bounce_map) 437 vduse_domain_reset_bounce_map(domain); 438 439 down_write(&dev->rwsem); 440 441 dev->status = 0; 442 dev->driver_features = 0; 443 dev->generation++; 444 spin_lock(&dev->irq_lock); 445 dev->config_cb.callback = NULL; 446 dev->config_cb.private = NULL; 447 spin_unlock(&dev->irq_lock); 448 flush_work(&dev->inject); 449 450 for (i = 0; i < dev->vq_num; i++) { 451 struct vduse_virtqueue *vq = dev->vqs[i]; 452 453 vq->ready = false; 454 vq->desc_addr = 0; 455 vq->driver_addr = 0; 456 vq->device_addr = 0; 457 vq->num = 0; 458 memset(&vq->state, 0, sizeof(vq->state)); 459 460 spin_lock(&vq->kick_lock); 461 vq->kicked = false; 462 if (vq->kickfd) 463 eventfd_ctx_put(vq->kickfd); 464 vq->kickfd = NULL; 465 spin_unlock(&vq->kick_lock); 466 467 spin_lock(&vq->irq_lock); 468 vq->cb.callback = NULL; 469 vq->cb.private = NULL; 470 vq->cb.trigger = NULL; 471 spin_unlock(&vq->irq_lock); 472 flush_work(&vq->inject); 473 flush_work(&vq->kick); 474 } 475 476 up_write(&dev->rwsem); 477 } 478 479 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx, 480 u64 desc_area, u64 driver_area, 481 u64 device_area) 482 { 483 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 484 struct vduse_virtqueue *vq = dev->vqs[idx]; 485 486 vq->desc_addr = desc_area; 487 vq->driver_addr = driver_area; 488 vq->device_addr = device_area; 489 490 return 0; 491 } 492 493 static void vduse_vq_kick(struct vduse_virtqueue *vq) 494 { 495 spin_lock(&vq->kick_lock); 496 if (!vq->ready) 497 goto unlock; 498 499 if (vq->kickfd) 500 eventfd_signal(vq->kickfd); 501 else 502 vq->kicked = true; 503 unlock: 504 spin_unlock(&vq->kick_lock); 505 } 506 507 static void vduse_vq_kick_work(struct work_struct *work) 508 { 509 struct vduse_virtqueue *vq = container_of(work, 510 struct vduse_virtqueue, kick); 511 512 vduse_vq_kick(vq); 513 } 514 515 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx) 516 { 517 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 518 struct vduse_virtqueue *vq = dev->vqs[idx]; 519 520 if (!eventfd_signal_allowed()) { 521 schedule_work(&vq->kick); 522 return; 523 } 524 vduse_vq_kick(vq); 525 } 526 527 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx, 528 struct vdpa_callback *cb) 529 { 530 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 531 struct vduse_virtqueue *vq = dev->vqs[idx]; 532 533 spin_lock(&vq->irq_lock); 534 vq->cb.callback = cb->callback; 535 vq->cb.private = cb->private; 536 vq->cb.trigger = cb->trigger; 537 spin_unlock(&vq->irq_lock); 538 } 539 540 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num) 541 { 542 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 543 struct vduse_virtqueue *vq = dev->vqs[idx]; 544 545 vq->num = num; 546 } 547 548 static u16 vduse_vdpa_get_vq_size(struct vdpa_device *vdpa, u16 idx) 549 { 550 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 551 struct vduse_virtqueue *vq = dev->vqs[idx]; 552 553 if (vq->num) 554 return vq->num; 555 else 556 return vq->num_max; 557 } 558 559 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa, 560 u16 idx, bool ready) 561 { 562 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 563 struct vduse_virtqueue *vq = dev->vqs[idx]; 564 565 vq->ready = ready; 566 } 567 568 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx) 569 { 570 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 571 struct vduse_virtqueue *vq = dev->vqs[idx]; 572 573 return vq->ready; 574 } 575 576 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, 577 const struct vdpa_vq_state *state) 578 { 579 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 580 struct vduse_virtqueue *vq = dev->vqs[idx]; 581 582 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { 583 vq->state.packed.last_avail_counter = 584 state->packed.last_avail_counter; 585 vq->state.packed.last_avail_idx = state->packed.last_avail_idx; 586 vq->state.packed.last_used_counter = 587 state->packed.last_used_counter; 588 vq->state.packed.last_used_idx = state->packed.last_used_idx; 589 } else 590 vq->state.split.avail_index = state->split.avail_index; 591 592 return 0; 593 } 594 595 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, 596 struct vdpa_vq_state *state) 597 { 598 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 599 struct vduse_virtqueue *vq = dev->vqs[idx]; 600 601 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) 602 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed); 603 604 return vduse_dev_get_vq_state_split(dev, vq, &state->split); 605 } 606 607 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa) 608 { 609 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 610 611 return dev->vq_align; 612 } 613 614 static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa) 615 { 616 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 617 618 return dev->device_features; 619 } 620 621 static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features) 622 { 623 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 624 625 dev->driver_features = features; 626 return 0; 627 } 628 629 static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa) 630 { 631 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 632 633 return dev->driver_features; 634 } 635 636 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa, 637 struct vdpa_callback *cb) 638 { 639 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 640 641 spin_lock(&dev->irq_lock); 642 dev->config_cb.callback = cb->callback; 643 dev->config_cb.private = cb->private; 644 spin_unlock(&dev->irq_lock); 645 } 646 647 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa) 648 { 649 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 650 u16 num_max = 0; 651 int i; 652 653 for (i = 0; i < dev->vq_num; i++) 654 if (num_max < dev->vqs[i]->num_max) 655 num_max = dev->vqs[i]->num_max; 656 657 return num_max; 658 } 659 660 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa) 661 { 662 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 663 664 return dev->device_id; 665 } 666 667 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa) 668 { 669 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 670 671 return dev->vendor_id; 672 } 673 674 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) 675 { 676 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 677 678 return dev->status; 679 } 680 681 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) 682 { 683 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 684 685 if (vduse_dev_set_status(dev, status)) 686 return; 687 688 dev->status = status; 689 } 690 691 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa) 692 { 693 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 694 695 return dev->config_size; 696 } 697 698 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset, 699 void *buf, unsigned int len) 700 { 701 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 702 703 /* Initialize the buffer in case of partial copy. */ 704 memset(buf, 0, len); 705 706 if (offset > dev->config_size) 707 return; 708 709 if (len > dev->config_size - offset) 710 len = dev->config_size - offset; 711 712 memcpy(buf, dev->config + offset, len); 713 } 714 715 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset, 716 const void *buf, unsigned int len) 717 { 718 /* Now we only support read-only configuration space */ 719 } 720 721 static int vduse_vdpa_reset(struct vdpa_device *vdpa) 722 { 723 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 724 int ret = vduse_dev_set_status(dev, 0); 725 726 vduse_dev_reset(dev); 727 728 return ret; 729 } 730 731 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa) 732 { 733 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 734 735 return dev->generation; 736 } 737 738 static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx, 739 const struct cpumask *cpu_mask) 740 { 741 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 742 743 if (cpu_mask) 744 cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask); 745 else 746 cpumask_setall(&dev->vqs[idx]->irq_affinity); 747 748 return 0; 749 } 750 751 static const struct cpumask * 752 vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx) 753 { 754 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 755 756 return &dev->vqs[idx]->irq_affinity; 757 } 758 759 static int vduse_vdpa_set_map(struct vdpa_device *vdpa, 760 unsigned int asid, 761 struct vhost_iotlb *iotlb) 762 { 763 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 764 int ret; 765 766 ret = vduse_domain_set_map(dev->domain, iotlb); 767 if (ret) 768 return ret; 769 770 ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX); 771 if (ret) { 772 vduse_domain_clear_map(dev->domain, iotlb); 773 return ret; 774 } 775 776 return 0; 777 } 778 779 static void vduse_vdpa_free(struct vdpa_device *vdpa) 780 { 781 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 782 783 dev->vdev = NULL; 784 } 785 786 static const struct vdpa_config_ops vduse_vdpa_config_ops = { 787 .set_vq_address = vduse_vdpa_set_vq_address, 788 .kick_vq = vduse_vdpa_kick_vq, 789 .set_vq_cb = vduse_vdpa_set_vq_cb, 790 .set_vq_num = vduse_vdpa_set_vq_num, 791 .get_vq_size = vduse_vdpa_get_vq_size, 792 .set_vq_ready = vduse_vdpa_set_vq_ready, 793 .get_vq_ready = vduse_vdpa_get_vq_ready, 794 .set_vq_state = vduse_vdpa_set_vq_state, 795 .get_vq_state = vduse_vdpa_get_vq_state, 796 .get_vq_align = vduse_vdpa_get_vq_align, 797 .get_device_features = vduse_vdpa_get_device_features, 798 .set_driver_features = vduse_vdpa_set_driver_features, 799 .get_driver_features = vduse_vdpa_get_driver_features, 800 .set_config_cb = vduse_vdpa_set_config_cb, 801 .get_vq_num_max = vduse_vdpa_get_vq_num_max, 802 .get_device_id = vduse_vdpa_get_device_id, 803 .get_vendor_id = vduse_vdpa_get_vendor_id, 804 .get_status = vduse_vdpa_get_status, 805 .set_status = vduse_vdpa_set_status, 806 .get_config_size = vduse_vdpa_get_config_size, 807 .get_config = vduse_vdpa_get_config, 808 .set_config = vduse_vdpa_set_config, 809 .get_generation = vduse_vdpa_get_generation, 810 .set_vq_affinity = vduse_vdpa_set_vq_affinity, 811 .get_vq_affinity = vduse_vdpa_get_vq_affinity, 812 .reset = vduse_vdpa_reset, 813 .set_map = vduse_vdpa_set_map, 814 .free = vduse_vdpa_free, 815 }; 816 817 static void vduse_dev_sync_single_for_device(struct device *dev, 818 dma_addr_t dma_addr, size_t size, 819 enum dma_data_direction dir) 820 { 821 struct vduse_dev *vdev = dev_to_vduse(dev); 822 struct vduse_iova_domain *domain = vdev->domain; 823 824 vduse_domain_sync_single_for_device(domain, dma_addr, size, dir); 825 } 826 827 static void vduse_dev_sync_single_for_cpu(struct device *dev, 828 dma_addr_t dma_addr, size_t size, 829 enum dma_data_direction dir) 830 { 831 struct vduse_dev *vdev = dev_to_vduse(dev); 832 struct vduse_iova_domain *domain = vdev->domain; 833 834 vduse_domain_sync_single_for_cpu(domain, dma_addr, size, dir); 835 } 836 837 static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page, 838 unsigned long offset, size_t size, 839 enum dma_data_direction dir, 840 unsigned long attrs) 841 { 842 struct vduse_dev *vdev = dev_to_vduse(dev); 843 struct vduse_iova_domain *domain = vdev->domain; 844 845 return vduse_domain_map_page(domain, page, offset, size, dir, attrs); 846 } 847 848 static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr, 849 size_t size, enum dma_data_direction dir, 850 unsigned long attrs) 851 { 852 struct vduse_dev *vdev = dev_to_vduse(dev); 853 struct vduse_iova_domain *domain = vdev->domain; 854 855 return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs); 856 } 857 858 static void *vduse_dev_alloc_coherent(struct device *dev, size_t size, 859 dma_addr_t *dma_addr, gfp_t flag, 860 unsigned long attrs) 861 { 862 struct vduse_dev *vdev = dev_to_vduse(dev); 863 struct vduse_iova_domain *domain = vdev->domain; 864 unsigned long iova; 865 void *addr; 866 867 *dma_addr = DMA_MAPPING_ERROR; 868 addr = vduse_domain_alloc_coherent(domain, size, 869 (dma_addr_t *)&iova, flag, attrs); 870 if (!addr) 871 return NULL; 872 873 *dma_addr = (dma_addr_t)iova; 874 875 return addr; 876 } 877 878 static void vduse_dev_free_coherent(struct device *dev, size_t size, 879 void *vaddr, dma_addr_t dma_addr, 880 unsigned long attrs) 881 { 882 struct vduse_dev *vdev = dev_to_vduse(dev); 883 struct vduse_iova_domain *domain = vdev->domain; 884 885 vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs); 886 } 887 888 static size_t vduse_dev_max_mapping_size(struct device *dev) 889 { 890 struct vduse_dev *vdev = dev_to_vduse(dev); 891 struct vduse_iova_domain *domain = vdev->domain; 892 893 return domain->bounce_size; 894 } 895 896 static const struct dma_map_ops vduse_dev_dma_ops = { 897 .sync_single_for_device = vduse_dev_sync_single_for_device, 898 .sync_single_for_cpu = vduse_dev_sync_single_for_cpu, 899 .map_page = vduse_dev_map_page, 900 .unmap_page = vduse_dev_unmap_page, 901 .alloc = vduse_dev_alloc_coherent, 902 .free = vduse_dev_free_coherent, 903 .max_mapping_size = vduse_dev_max_mapping_size, 904 }; 905 906 static unsigned int perm_to_file_flags(u8 perm) 907 { 908 unsigned int flags = 0; 909 910 switch (perm) { 911 case VDUSE_ACCESS_WO: 912 flags |= O_WRONLY; 913 break; 914 case VDUSE_ACCESS_RO: 915 flags |= O_RDONLY; 916 break; 917 case VDUSE_ACCESS_RW: 918 flags |= O_RDWR; 919 break; 920 default: 921 WARN(1, "invalidate vhost IOTLB permission\n"); 922 break; 923 } 924 925 return flags; 926 } 927 928 static int vduse_kickfd_setup(struct vduse_dev *dev, 929 struct vduse_vq_eventfd *eventfd) 930 { 931 struct eventfd_ctx *ctx = NULL; 932 struct vduse_virtqueue *vq; 933 u32 index; 934 935 if (eventfd->index >= dev->vq_num) 936 return -EINVAL; 937 938 index = array_index_nospec(eventfd->index, dev->vq_num); 939 vq = dev->vqs[index]; 940 if (eventfd->fd >= 0) { 941 ctx = eventfd_ctx_fdget(eventfd->fd); 942 if (IS_ERR(ctx)) 943 return PTR_ERR(ctx); 944 } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN) 945 return 0; 946 947 spin_lock(&vq->kick_lock); 948 if (vq->kickfd) 949 eventfd_ctx_put(vq->kickfd); 950 vq->kickfd = ctx; 951 if (vq->ready && vq->kicked && vq->kickfd) { 952 eventfd_signal(vq->kickfd); 953 vq->kicked = false; 954 } 955 spin_unlock(&vq->kick_lock); 956 957 return 0; 958 } 959 960 static bool vduse_dev_is_ready(struct vduse_dev *dev) 961 { 962 int i; 963 964 for (i = 0; i < dev->vq_num; i++) 965 if (!dev->vqs[i]->num_max) 966 return false; 967 968 return true; 969 } 970 971 static void vduse_dev_irq_inject(struct work_struct *work) 972 { 973 struct vduse_dev *dev = container_of(work, struct vduse_dev, inject); 974 975 spin_lock_bh(&dev->irq_lock); 976 if (dev->config_cb.callback) 977 dev->config_cb.callback(dev->config_cb.private); 978 spin_unlock_bh(&dev->irq_lock); 979 } 980 981 static void vduse_vq_irq_inject(struct work_struct *work) 982 { 983 struct vduse_virtqueue *vq = container_of(work, 984 struct vduse_virtqueue, inject); 985 986 spin_lock_bh(&vq->irq_lock); 987 if (vq->ready && vq->cb.callback) 988 vq->cb.callback(vq->cb.private); 989 spin_unlock_bh(&vq->irq_lock); 990 } 991 992 static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq) 993 { 994 bool signal = false; 995 996 if (!vq->cb.trigger) 997 return false; 998 999 spin_lock_irq(&vq->irq_lock); 1000 if (vq->ready && vq->cb.trigger) { 1001 eventfd_signal(vq->cb.trigger); 1002 signal = true; 1003 } 1004 spin_unlock_irq(&vq->irq_lock); 1005 1006 return signal; 1007 } 1008 1009 static int vduse_dev_queue_irq_work(struct vduse_dev *dev, 1010 struct work_struct *irq_work, 1011 int irq_effective_cpu) 1012 { 1013 int ret = -EINVAL; 1014 1015 down_read(&dev->rwsem); 1016 if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 1017 goto unlock; 1018 1019 ret = 0; 1020 if (irq_effective_cpu == IRQ_UNBOUND) 1021 queue_work(vduse_irq_wq, irq_work); 1022 else 1023 queue_work_on(irq_effective_cpu, 1024 vduse_irq_bound_wq, irq_work); 1025 unlock: 1026 up_read(&dev->rwsem); 1027 1028 return ret; 1029 } 1030 1031 static int vduse_dev_dereg_umem(struct vduse_dev *dev, 1032 u64 iova, u64 size) 1033 { 1034 int ret; 1035 1036 mutex_lock(&dev->mem_lock); 1037 ret = -ENOENT; 1038 if (!dev->umem) 1039 goto unlock; 1040 1041 ret = -EINVAL; 1042 if (!dev->domain) 1043 goto unlock; 1044 1045 if (dev->umem->iova != iova || size != dev->domain->bounce_size) 1046 goto unlock; 1047 1048 vduse_domain_remove_user_bounce_pages(dev->domain); 1049 unpin_user_pages_dirty_lock(dev->umem->pages, 1050 dev->umem->npages, true); 1051 atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm); 1052 mmdrop(dev->umem->mm); 1053 vfree(dev->umem->pages); 1054 kfree(dev->umem); 1055 dev->umem = NULL; 1056 ret = 0; 1057 unlock: 1058 mutex_unlock(&dev->mem_lock); 1059 return ret; 1060 } 1061 1062 static int vduse_dev_reg_umem(struct vduse_dev *dev, 1063 u64 iova, u64 uaddr, u64 size) 1064 { 1065 struct page **page_list = NULL; 1066 struct vduse_umem *umem = NULL; 1067 long pinned = 0; 1068 unsigned long npages, lock_limit; 1069 int ret; 1070 1071 if (!dev->domain || !dev->domain->bounce_map || 1072 size != dev->domain->bounce_size || 1073 iova != 0 || uaddr & ~PAGE_MASK) 1074 return -EINVAL; 1075 1076 mutex_lock(&dev->mem_lock); 1077 ret = -EEXIST; 1078 if (dev->umem) 1079 goto unlock; 1080 1081 ret = -ENOMEM; 1082 npages = size >> PAGE_SHIFT; 1083 page_list = __vmalloc(array_size(npages, sizeof(struct page *)), 1084 GFP_KERNEL_ACCOUNT); 1085 umem = kzalloc(sizeof(*umem), GFP_KERNEL); 1086 if (!page_list || !umem) 1087 goto unlock; 1088 1089 mmap_read_lock(current->mm); 1090 1091 lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); 1092 if (npages + atomic64_read(¤t->mm->pinned_vm) > lock_limit) 1093 goto out; 1094 1095 pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE, 1096 page_list); 1097 if (pinned != npages) { 1098 ret = pinned < 0 ? pinned : -ENOMEM; 1099 goto out; 1100 } 1101 1102 ret = vduse_domain_add_user_bounce_pages(dev->domain, 1103 page_list, pinned); 1104 if (ret) 1105 goto out; 1106 1107 atomic64_add(npages, ¤t->mm->pinned_vm); 1108 1109 umem->pages = page_list; 1110 umem->npages = pinned; 1111 umem->iova = iova; 1112 umem->mm = current->mm; 1113 mmgrab(current->mm); 1114 1115 dev->umem = umem; 1116 out: 1117 if (ret && pinned > 0) 1118 unpin_user_pages(page_list, pinned); 1119 1120 mmap_read_unlock(current->mm); 1121 unlock: 1122 if (ret) { 1123 vfree(page_list); 1124 kfree(umem); 1125 } 1126 mutex_unlock(&dev->mem_lock); 1127 return ret; 1128 } 1129 1130 static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq) 1131 { 1132 int curr_cpu = vq->irq_effective_cpu; 1133 1134 while (true) { 1135 curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity); 1136 if (cpu_online(curr_cpu)) 1137 break; 1138 1139 if (curr_cpu >= nr_cpu_ids) 1140 curr_cpu = IRQ_UNBOUND; 1141 } 1142 1143 vq->irq_effective_cpu = curr_cpu; 1144 } 1145 1146 static long vduse_dev_ioctl(struct file *file, unsigned int cmd, 1147 unsigned long arg) 1148 { 1149 struct vduse_dev *dev = file->private_data; 1150 void __user *argp = (void __user *)arg; 1151 int ret; 1152 1153 if (unlikely(dev->broken)) 1154 return -EPERM; 1155 1156 switch (cmd) { 1157 case VDUSE_IOTLB_GET_FD: { 1158 struct vduse_iotlb_entry entry; 1159 struct vhost_iotlb_map *map; 1160 struct vdpa_map_file *map_file; 1161 struct file *f = NULL; 1162 1163 ret = -EFAULT; 1164 if (copy_from_user(&entry, argp, sizeof(entry))) 1165 break; 1166 1167 ret = -EINVAL; 1168 if (entry.start > entry.last) 1169 break; 1170 1171 mutex_lock(&dev->domain_lock); 1172 if (!dev->domain) { 1173 mutex_unlock(&dev->domain_lock); 1174 break; 1175 } 1176 spin_lock(&dev->domain->iotlb_lock); 1177 map = vhost_iotlb_itree_first(dev->domain->iotlb, 1178 entry.start, entry.last); 1179 if (map) { 1180 map_file = (struct vdpa_map_file *)map->opaque; 1181 f = get_file(map_file->file); 1182 entry.offset = map_file->offset; 1183 entry.start = map->start; 1184 entry.last = map->last; 1185 entry.perm = map->perm; 1186 } 1187 spin_unlock(&dev->domain->iotlb_lock); 1188 mutex_unlock(&dev->domain_lock); 1189 ret = -EINVAL; 1190 if (!f) 1191 break; 1192 1193 ret = -EFAULT; 1194 if (copy_to_user(argp, &entry, sizeof(entry))) { 1195 fput(f); 1196 break; 1197 } 1198 ret = receive_fd(f, NULL, perm_to_file_flags(entry.perm)); 1199 fput(f); 1200 break; 1201 } 1202 case VDUSE_DEV_GET_FEATURES: 1203 /* 1204 * Just mirror what driver wrote here. 1205 * The driver is expected to check FEATURE_OK later. 1206 */ 1207 ret = put_user(dev->driver_features, (u64 __user *)argp); 1208 break; 1209 case VDUSE_DEV_SET_CONFIG: { 1210 struct vduse_config_data config; 1211 unsigned long size = offsetof(struct vduse_config_data, 1212 buffer); 1213 1214 ret = -EFAULT; 1215 if (copy_from_user(&config, argp, size)) 1216 break; 1217 1218 ret = -EINVAL; 1219 if (config.offset > dev->config_size || 1220 config.length == 0 || 1221 config.length > dev->config_size - config.offset) 1222 break; 1223 1224 ret = -EFAULT; 1225 if (copy_from_user(dev->config + config.offset, argp + size, 1226 config.length)) 1227 break; 1228 1229 ret = 0; 1230 break; 1231 } 1232 case VDUSE_DEV_INJECT_CONFIG_IRQ: 1233 ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND); 1234 break; 1235 case VDUSE_VQ_SETUP: { 1236 struct vduse_vq_config config; 1237 u32 index; 1238 1239 ret = -EFAULT; 1240 if (copy_from_user(&config, argp, sizeof(config))) 1241 break; 1242 1243 ret = -EINVAL; 1244 if (config.index >= dev->vq_num) 1245 break; 1246 1247 if (!is_mem_zero((const char *)config.reserved, 1248 sizeof(config.reserved))) 1249 break; 1250 1251 index = array_index_nospec(config.index, dev->vq_num); 1252 dev->vqs[index]->num_max = config.max_size; 1253 ret = 0; 1254 break; 1255 } 1256 case VDUSE_VQ_GET_INFO: { 1257 struct vduse_vq_info vq_info; 1258 struct vduse_virtqueue *vq; 1259 u32 index; 1260 1261 ret = -EFAULT; 1262 if (copy_from_user(&vq_info, argp, sizeof(vq_info))) 1263 break; 1264 1265 ret = -EINVAL; 1266 if (vq_info.index >= dev->vq_num) 1267 break; 1268 1269 index = array_index_nospec(vq_info.index, dev->vq_num); 1270 vq = dev->vqs[index]; 1271 vq_info.desc_addr = vq->desc_addr; 1272 vq_info.driver_addr = vq->driver_addr; 1273 vq_info.device_addr = vq->device_addr; 1274 vq_info.num = vq->num; 1275 1276 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { 1277 vq_info.packed.last_avail_counter = 1278 vq->state.packed.last_avail_counter; 1279 vq_info.packed.last_avail_idx = 1280 vq->state.packed.last_avail_idx; 1281 vq_info.packed.last_used_counter = 1282 vq->state.packed.last_used_counter; 1283 vq_info.packed.last_used_idx = 1284 vq->state.packed.last_used_idx; 1285 } else 1286 vq_info.split.avail_index = 1287 vq->state.split.avail_index; 1288 1289 vq_info.ready = vq->ready; 1290 1291 ret = -EFAULT; 1292 if (copy_to_user(argp, &vq_info, sizeof(vq_info))) 1293 break; 1294 1295 ret = 0; 1296 break; 1297 } 1298 case VDUSE_VQ_SETUP_KICKFD: { 1299 struct vduse_vq_eventfd eventfd; 1300 1301 ret = -EFAULT; 1302 if (copy_from_user(&eventfd, argp, sizeof(eventfd))) 1303 break; 1304 1305 ret = vduse_kickfd_setup(dev, &eventfd); 1306 break; 1307 } 1308 case VDUSE_VQ_INJECT_IRQ: { 1309 u32 index; 1310 1311 ret = -EFAULT; 1312 if (get_user(index, (u32 __user *)argp)) 1313 break; 1314 1315 ret = -EINVAL; 1316 if (index >= dev->vq_num) 1317 break; 1318 1319 ret = 0; 1320 index = array_index_nospec(index, dev->vq_num); 1321 if (!vduse_vq_signal_irqfd(dev->vqs[index])) { 1322 vduse_vq_update_effective_cpu(dev->vqs[index]); 1323 ret = vduse_dev_queue_irq_work(dev, 1324 &dev->vqs[index]->inject, 1325 dev->vqs[index]->irq_effective_cpu); 1326 } 1327 break; 1328 } 1329 case VDUSE_IOTLB_REG_UMEM: { 1330 struct vduse_iova_umem umem; 1331 1332 ret = -EFAULT; 1333 if (copy_from_user(&umem, argp, sizeof(umem))) 1334 break; 1335 1336 ret = -EINVAL; 1337 if (!is_mem_zero((const char *)umem.reserved, 1338 sizeof(umem.reserved))) 1339 break; 1340 1341 mutex_lock(&dev->domain_lock); 1342 ret = vduse_dev_reg_umem(dev, umem.iova, 1343 umem.uaddr, umem.size); 1344 mutex_unlock(&dev->domain_lock); 1345 break; 1346 } 1347 case VDUSE_IOTLB_DEREG_UMEM: { 1348 struct vduse_iova_umem umem; 1349 1350 ret = -EFAULT; 1351 if (copy_from_user(&umem, argp, sizeof(umem))) 1352 break; 1353 1354 ret = -EINVAL; 1355 if (!is_mem_zero((const char *)umem.reserved, 1356 sizeof(umem.reserved))) 1357 break; 1358 mutex_lock(&dev->domain_lock); 1359 ret = vduse_dev_dereg_umem(dev, umem.iova, 1360 umem.size); 1361 mutex_unlock(&dev->domain_lock); 1362 break; 1363 } 1364 case VDUSE_IOTLB_GET_INFO: { 1365 struct vduse_iova_info info; 1366 struct vhost_iotlb_map *map; 1367 1368 ret = -EFAULT; 1369 if (copy_from_user(&info, argp, sizeof(info))) 1370 break; 1371 1372 ret = -EINVAL; 1373 if (info.start > info.last) 1374 break; 1375 1376 if (!is_mem_zero((const char *)info.reserved, 1377 sizeof(info.reserved))) 1378 break; 1379 1380 mutex_lock(&dev->domain_lock); 1381 if (!dev->domain) { 1382 mutex_unlock(&dev->domain_lock); 1383 break; 1384 } 1385 spin_lock(&dev->domain->iotlb_lock); 1386 map = vhost_iotlb_itree_first(dev->domain->iotlb, 1387 info.start, info.last); 1388 if (map) { 1389 info.start = map->start; 1390 info.last = map->last; 1391 info.capability = 0; 1392 if (dev->domain->bounce_map && map->start == 0 && 1393 map->last == dev->domain->bounce_size - 1) 1394 info.capability |= VDUSE_IOVA_CAP_UMEM; 1395 } 1396 spin_unlock(&dev->domain->iotlb_lock); 1397 mutex_unlock(&dev->domain_lock); 1398 if (!map) 1399 break; 1400 1401 ret = -EFAULT; 1402 if (copy_to_user(argp, &info, sizeof(info))) 1403 break; 1404 1405 ret = 0; 1406 break; 1407 } 1408 default: 1409 ret = -ENOIOCTLCMD; 1410 break; 1411 } 1412 1413 return ret; 1414 } 1415 1416 static int vduse_dev_release(struct inode *inode, struct file *file) 1417 { 1418 struct vduse_dev *dev = file->private_data; 1419 1420 mutex_lock(&dev->domain_lock); 1421 if (dev->domain) 1422 vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size); 1423 mutex_unlock(&dev->domain_lock); 1424 spin_lock(&dev->msg_lock); 1425 /* Make sure the inflight messages can processed after reconncection */ 1426 list_splice_init(&dev->recv_list, &dev->send_list); 1427 spin_unlock(&dev->msg_lock); 1428 dev->connected = false; 1429 1430 return 0; 1431 } 1432 1433 static struct vduse_dev *vduse_dev_get_from_minor(int minor) 1434 { 1435 struct vduse_dev *dev; 1436 1437 mutex_lock(&vduse_lock); 1438 dev = idr_find(&vduse_idr, minor); 1439 mutex_unlock(&vduse_lock); 1440 1441 return dev; 1442 } 1443 1444 static int vduse_dev_open(struct inode *inode, struct file *file) 1445 { 1446 int ret; 1447 struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode)); 1448 1449 if (!dev) 1450 return -ENODEV; 1451 1452 ret = -EBUSY; 1453 mutex_lock(&dev->lock); 1454 if (dev->connected) 1455 goto unlock; 1456 1457 ret = 0; 1458 dev->connected = true; 1459 file->private_data = dev; 1460 unlock: 1461 mutex_unlock(&dev->lock); 1462 1463 return ret; 1464 } 1465 1466 static const struct file_operations vduse_dev_fops = { 1467 .owner = THIS_MODULE, 1468 .open = vduse_dev_open, 1469 .release = vduse_dev_release, 1470 .read_iter = vduse_dev_read_iter, 1471 .write_iter = vduse_dev_write_iter, 1472 .poll = vduse_dev_poll, 1473 .unlocked_ioctl = vduse_dev_ioctl, 1474 .compat_ioctl = compat_ptr_ioctl, 1475 .llseek = noop_llseek, 1476 }; 1477 1478 static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf) 1479 { 1480 return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity)); 1481 } 1482 1483 static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq, 1484 const char *buf, size_t count) 1485 { 1486 cpumask_var_t new_value; 1487 int ret; 1488 1489 if (!zalloc_cpumask_var(&new_value, GFP_KERNEL)) 1490 return -ENOMEM; 1491 1492 ret = cpumask_parse(buf, new_value); 1493 if (ret) 1494 goto free_mask; 1495 1496 ret = -EINVAL; 1497 if (!cpumask_intersects(new_value, cpu_online_mask)) 1498 goto free_mask; 1499 1500 cpumask_copy(&vq->irq_affinity, new_value); 1501 ret = count; 1502 free_mask: 1503 free_cpumask_var(new_value); 1504 return ret; 1505 } 1506 1507 struct vq_sysfs_entry { 1508 struct attribute attr; 1509 ssize_t (*show)(struct vduse_virtqueue *vq, char *buf); 1510 ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf, 1511 size_t count); 1512 }; 1513 1514 static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity); 1515 1516 static struct attribute *vq_attrs[] = { 1517 &irq_cb_affinity_attr.attr, 1518 NULL, 1519 }; 1520 ATTRIBUTE_GROUPS(vq); 1521 1522 static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr, 1523 char *buf) 1524 { 1525 struct vduse_virtqueue *vq = container_of(kobj, 1526 struct vduse_virtqueue, kobj); 1527 struct vq_sysfs_entry *entry = container_of(attr, 1528 struct vq_sysfs_entry, attr); 1529 1530 if (!entry->show) 1531 return -EIO; 1532 1533 return entry->show(vq, buf); 1534 } 1535 1536 static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr, 1537 const char *buf, size_t count) 1538 { 1539 struct vduse_virtqueue *vq = container_of(kobj, 1540 struct vduse_virtqueue, kobj); 1541 struct vq_sysfs_entry *entry = container_of(attr, 1542 struct vq_sysfs_entry, attr); 1543 1544 if (!entry->store) 1545 return -EIO; 1546 1547 return entry->store(vq, buf, count); 1548 } 1549 1550 static const struct sysfs_ops vq_sysfs_ops = { 1551 .show = vq_attr_show, 1552 .store = vq_attr_store, 1553 }; 1554 1555 static void vq_release(struct kobject *kobj) 1556 { 1557 struct vduse_virtqueue *vq = container_of(kobj, 1558 struct vduse_virtqueue, kobj); 1559 kfree(vq); 1560 } 1561 1562 static const struct kobj_type vq_type = { 1563 .release = vq_release, 1564 .sysfs_ops = &vq_sysfs_ops, 1565 .default_groups = vq_groups, 1566 }; 1567 1568 static char *vduse_devnode(const struct device *dev, umode_t *mode) 1569 { 1570 return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev)); 1571 } 1572 1573 static const struct class vduse_class = { 1574 .name = "vduse", 1575 .devnode = vduse_devnode, 1576 }; 1577 1578 static void vduse_dev_deinit_vqs(struct vduse_dev *dev) 1579 { 1580 int i; 1581 1582 if (!dev->vqs) 1583 return; 1584 1585 for (i = 0; i < dev->vq_num; i++) 1586 kobject_put(&dev->vqs[i]->kobj); 1587 kfree(dev->vqs); 1588 } 1589 1590 static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num) 1591 { 1592 int ret, i; 1593 1594 dev->vq_align = vq_align; 1595 dev->vq_num = vq_num; 1596 dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL); 1597 if (!dev->vqs) 1598 return -ENOMEM; 1599 1600 for (i = 0; i < vq_num; i++) { 1601 dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL); 1602 if (!dev->vqs[i]) { 1603 ret = -ENOMEM; 1604 goto err; 1605 } 1606 1607 dev->vqs[i]->index = i; 1608 dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND; 1609 INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject); 1610 INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work); 1611 spin_lock_init(&dev->vqs[i]->kick_lock); 1612 spin_lock_init(&dev->vqs[i]->irq_lock); 1613 cpumask_setall(&dev->vqs[i]->irq_affinity); 1614 1615 kobject_init(&dev->vqs[i]->kobj, &vq_type); 1616 ret = kobject_add(&dev->vqs[i]->kobj, 1617 &dev->dev->kobj, "vq%d", i); 1618 if (ret) { 1619 kfree(dev->vqs[i]); 1620 goto err; 1621 } 1622 } 1623 1624 return 0; 1625 err: 1626 while (i--) 1627 kobject_put(&dev->vqs[i]->kobj); 1628 kfree(dev->vqs); 1629 dev->vqs = NULL; 1630 return ret; 1631 } 1632 1633 static struct vduse_dev *vduse_dev_create(void) 1634 { 1635 struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL); 1636 1637 if (!dev) 1638 return NULL; 1639 1640 mutex_init(&dev->lock); 1641 mutex_init(&dev->mem_lock); 1642 mutex_init(&dev->domain_lock); 1643 spin_lock_init(&dev->msg_lock); 1644 INIT_LIST_HEAD(&dev->send_list); 1645 INIT_LIST_HEAD(&dev->recv_list); 1646 spin_lock_init(&dev->irq_lock); 1647 init_rwsem(&dev->rwsem); 1648 1649 INIT_WORK(&dev->inject, vduse_dev_irq_inject); 1650 init_waitqueue_head(&dev->waitq); 1651 1652 return dev; 1653 } 1654 1655 static void vduse_dev_destroy(struct vduse_dev *dev) 1656 { 1657 kfree(dev); 1658 } 1659 1660 static struct vduse_dev *vduse_find_dev(const char *name) 1661 { 1662 struct vduse_dev *dev; 1663 int id; 1664 1665 idr_for_each_entry(&vduse_idr, dev, id) 1666 if (!strcmp(dev->name, name)) 1667 return dev; 1668 1669 return NULL; 1670 } 1671 1672 static int vduse_destroy_dev(char *name) 1673 { 1674 struct vduse_dev *dev = vduse_find_dev(name); 1675 1676 if (!dev) 1677 return -EINVAL; 1678 1679 mutex_lock(&dev->lock); 1680 if (dev->vdev || dev->connected) { 1681 mutex_unlock(&dev->lock); 1682 return -EBUSY; 1683 } 1684 dev->connected = true; 1685 mutex_unlock(&dev->lock); 1686 1687 vduse_dev_reset(dev); 1688 device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); 1689 idr_remove(&vduse_idr, dev->minor); 1690 kvfree(dev->config); 1691 vduse_dev_deinit_vqs(dev); 1692 if (dev->domain) 1693 vduse_domain_destroy(dev->domain); 1694 kfree(dev->name); 1695 vduse_dev_destroy(dev); 1696 module_put(THIS_MODULE); 1697 1698 return 0; 1699 } 1700 1701 static bool device_is_allowed(u32 device_id) 1702 { 1703 int i; 1704 1705 for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++) 1706 if (allowed_device_id[i] == device_id) 1707 return true; 1708 1709 return false; 1710 } 1711 1712 static bool features_is_valid(struct vduse_dev_config *config) 1713 { 1714 if (!(config->features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) 1715 return false; 1716 1717 /* Now we only support read-only configuration space */ 1718 if ((config->device_id == VIRTIO_ID_BLOCK) && 1719 (config->features & BIT_ULL(VIRTIO_BLK_F_CONFIG_WCE))) 1720 return false; 1721 else if ((config->device_id == VIRTIO_ID_NET) && 1722 (config->features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 1723 return false; 1724 1725 if ((config->device_id == VIRTIO_ID_NET) && 1726 !(config->features & BIT_ULL(VIRTIO_F_VERSION_1))) 1727 return false; 1728 1729 return true; 1730 } 1731 1732 static bool vduse_validate_config(struct vduse_dev_config *config) 1733 { 1734 if (!is_mem_zero((const char *)config->reserved, 1735 sizeof(config->reserved))) 1736 return false; 1737 1738 if (config->vq_align > PAGE_SIZE) 1739 return false; 1740 1741 if (config->config_size > PAGE_SIZE) 1742 return false; 1743 1744 if (config->vq_num > 0xffff) 1745 return false; 1746 1747 if (!config->name[0]) 1748 return false; 1749 1750 if (!device_is_allowed(config->device_id)) 1751 return false; 1752 1753 if (!features_is_valid(config)) 1754 return false; 1755 1756 return true; 1757 } 1758 1759 static ssize_t msg_timeout_show(struct device *device, 1760 struct device_attribute *attr, char *buf) 1761 { 1762 struct vduse_dev *dev = dev_get_drvdata(device); 1763 1764 return sysfs_emit(buf, "%u\n", dev->msg_timeout); 1765 } 1766 1767 static ssize_t msg_timeout_store(struct device *device, 1768 struct device_attribute *attr, 1769 const char *buf, size_t count) 1770 { 1771 struct vduse_dev *dev = dev_get_drvdata(device); 1772 int ret; 1773 1774 ret = kstrtouint(buf, 10, &dev->msg_timeout); 1775 if (ret < 0) 1776 return ret; 1777 1778 return count; 1779 } 1780 1781 static DEVICE_ATTR_RW(msg_timeout); 1782 1783 static ssize_t bounce_size_show(struct device *device, 1784 struct device_attribute *attr, char *buf) 1785 { 1786 struct vduse_dev *dev = dev_get_drvdata(device); 1787 1788 return sysfs_emit(buf, "%u\n", dev->bounce_size); 1789 } 1790 1791 static ssize_t bounce_size_store(struct device *device, 1792 struct device_attribute *attr, 1793 const char *buf, size_t count) 1794 { 1795 struct vduse_dev *dev = dev_get_drvdata(device); 1796 unsigned int bounce_size; 1797 int ret; 1798 1799 ret = -EPERM; 1800 mutex_lock(&dev->domain_lock); 1801 if (dev->domain) 1802 goto unlock; 1803 1804 ret = kstrtouint(buf, 10, &bounce_size); 1805 if (ret < 0) 1806 goto unlock; 1807 1808 ret = -EINVAL; 1809 if (bounce_size > VDUSE_MAX_BOUNCE_SIZE || 1810 bounce_size < VDUSE_MIN_BOUNCE_SIZE) 1811 goto unlock; 1812 1813 dev->bounce_size = bounce_size & PAGE_MASK; 1814 ret = count; 1815 unlock: 1816 mutex_unlock(&dev->domain_lock); 1817 return ret; 1818 } 1819 1820 static DEVICE_ATTR_RW(bounce_size); 1821 1822 static struct attribute *vduse_dev_attrs[] = { 1823 &dev_attr_msg_timeout.attr, 1824 &dev_attr_bounce_size.attr, 1825 NULL 1826 }; 1827 1828 ATTRIBUTE_GROUPS(vduse_dev); 1829 1830 static int vduse_create_dev(struct vduse_dev_config *config, 1831 void *config_buf, u64 api_version) 1832 { 1833 int ret; 1834 struct vduse_dev *dev; 1835 1836 ret = -EPERM; 1837 if ((config->device_id == VIRTIO_ID_NET) && !capable(CAP_NET_ADMIN)) 1838 goto err; 1839 1840 ret = -EEXIST; 1841 if (vduse_find_dev(config->name)) 1842 goto err; 1843 1844 ret = -ENOMEM; 1845 dev = vduse_dev_create(); 1846 if (!dev) 1847 goto err; 1848 1849 dev->api_version = api_version; 1850 dev->device_features = config->features; 1851 dev->device_id = config->device_id; 1852 dev->vendor_id = config->vendor_id; 1853 dev->name = kstrdup(config->name, GFP_KERNEL); 1854 if (!dev->name) 1855 goto err_str; 1856 1857 dev->bounce_size = VDUSE_BOUNCE_SIZE; 1858 dev->config = config_buf; 1859 dev->config_size = config->config_size; 1860 1861 ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL); 1862 if (ret < 0) 1863 goto err_idr; 1864 1865 dev->minor = ret; 1866 dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT; 1867 dev->dev = device_create_with_groups(&vduse_class, NULL, 1868 MKDEV(MAJOR(vduse_major), dev->minor), 1869 dev, vduse_dev_groups, "%s", config->name); 1870 if (IS_ERR(dev->dev)) { 1871 ret = PTR_ERR(dev->dev); 1872 goto err_dev; 1873 } 1874 1875 ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num); 1876 if (ret) 1877 goto err_vqs; 1878 1879 __module_get(THIS_MODULE); 1880 1881 return 0; 1882 err_vqs: 1883 device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); 1884 err_dev: 1885 idr_remove(&vduse_idr, dev->minor); 1886 err_idr: 1887 kfree(dev->name); 1888 err_str: 1889 vduse_dev_destroy(dev); 1890 err: 1891 return ret; 1892 } 1893 1894 static long vduse_ioctl(struct file *file, unsigned int cmd, 1895 unsigned long arg) 1896 { 1897 int ret; 1898 void __user *argp = (void __user *)arg; 1899 struct vduse_control *control = file->private_data; 1900 1901 mutex_lock(&vduse_lock); 1902 switch (cmd) { 1903 case VDUSE_GET_API_VERSION: 1904 ret = put_user(control->api_version, (u64 __user *)argp); 1905 break; 1906 case VDUSE_SET_API_VERSION: { 1907 u64 api_version; 1908 1909 ret = -EFAULT; 1910 if (get_user(api_version, (u64 __user *)argp)) 1911 break; 1912 1913 ret = -EINVAL; 1914 if (api_version > VDUSE_API_VERSION) 1915 break; 1916 1917 ret = 0; 1918 control->api_version = api_version; 1919 break; 1920 } 1921 case VDUSE_CREATE_DEV: { 1922 struct vduse_dev_config config; 1923 unsigned long size = offsetof(struct vduse_dev_config, config); 1924 void *buf; 1925 1926 ret = -EFAULT; 1927 if (copy_from_user(&config, argp, size)) 1928 break; 1929 1930 ret = -EINVAL; 1931 if (vduse_validate_config(&config) == false) 1932 break; 1933 1934 buf = vmemdup_user(argp + size, config.config_size); 1935 if (IS_ERR(buf)) { 1936 ret = PTR_ERR(buf); 1937 break; 1938 } 1939 config.name[VDUSE_NAME_MAX - 1] = '\0'; 1940 ret = vduse_create_dev(&config, buf, control->api_version); 1941 if (ret) 1942 kvfree(buf); 1943 break; 1944 } 1945 case VDUSE_DESTROY_DEV: { 1946 char name[VDUSE_NAME_MAX]; 1947 1948 ret = -EFAULT; 1949 if (copy_from_user(name, argp, VDUSE_NAME_MAX)) 1950 break; 1951 1952 name[VDUSE_NAME_MAX - 1] = '\0'; 1953 ret = vduse_destroy_dev(name); 1954 break; 1955 } 1956 default: 1957 ret = -EINVAL; 1958 break; 1959 } 1960 mutex_unlock(&vduse_lock); 1961 1962 return ret; 1963 } 1964 1965 static int vduse_release(struct inode *inode, struct file *file) 1966 { 1967 struct vduse_control *control = file->private_data; 1968 1969 kfree(control); 1970 return 0; 1971 } 1972 1973 static int vduse_open(struct inode *inode, struct file *file) 1974 { 1975 struct vduse_control *control; 1976 1977 control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL); 1978 if (!control) 1979 return -ENOMEM; 1980 1981 control->api_version = VDUSE_API_VERSION; 1982 file->private_data = control; 1983 1984 return 0; 1985 } 1986 1987 static const struct file_operations vduse_ctrl_fops = { 1988 .owner = THIS_MODULE, 1989 .open = vduse_open, 1990 .release = vduse_release, 1991 .unlocked_ioctl = vduse_ioctl, 1992 .compat_ioctl = compat_ptr_ioctl, 1993 .llseek = noop_llseek, 1994 }; 1995 1996 struct vduse_mgmt_dev { 1997 struct vdpa_mgmt_dev mgmt_dev; 1998 struct device dev; 1999 }; 2000 2001 static struct vduse_mgmt_dev *vduse_mgmt; 2002 2003 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) 2004 { 2005 struct vduse_vdpa *vdev; 2006 int ret; 2007 2008 if (dev->vdev) 2009 return -EEXIST; 2010 2011 vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev, 2012 &vduse_vdpa_config_ops, 1, 1, name, true); 2013 if (IS_ERR(vdev)) 2014 return PTR_ERR(vdev); 2015 2016 dev->vdev = vdev; 2017 vdev->dev = dev; 2018 vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask; 2019 ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64)); 2020 if (ret) { 2021 put_device(&vdev->vdpa.dev); 2022 return ret; 2023 } 2024 set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops); 2025 vdev->vdpa.dma_dev = &vdev->vdpa.dev; 2026 vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev; 2027 2028 return 0; 2029 } 2030 2031 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, 2032 const struct vdpa_dev_set_config *config) 2033 { 2034 struct vduse_dev *dev; 2035 int ret; 2036 2037 mutex_lock(&vduse_lock); 2038 dev = vduse_find_dev(name); 2039 if (!dev || !vduse_dev_is_ready(dev)) { 2040 mutex_unlock(&vduse_lock); 2041 return -EINVAL; 2042 } 2043 ret = vduse_dev_init_vdpa(dev, name); 2044 mutex_unlock(&vduse_lock); 2045 if (ret) 2046 return ret; 2047 2048 mutex_lock(&dev->domain_lock); 2049 if (!dev->domain) 2050 dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1, 2051 dev->bounce_size); 2052 mutex_unlock(&dev->domain_lock); 2053 if (!dev->domain) { 2054 put_device(&dev->vdev->vdpa.dev); 2055 return -ENOMEM; 2056 } 2057 2058 ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num); 2059 if (ret) { 2060 put_device(&dev->vdev->vdpa.dev); 2061 mutex_lock(&dev->domain_lock); 2062 vduse_domain_destroy(dev->domain); 2063 dev->domain = NULL; 2064 mutex_unlock(&dev->domain_lock); 2065 return ret; 2066 } 2067 2068 return 0; 2069 } 2070 2071 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev) 2072 { 2073 _vdpa_unregister_device(dev); 2074 } 2075 2076 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = { 2077 .dev_add = vdpa_dev_add, 2078 .dev_del = vdpa_dev_del, 2079 }; 2080 2081 static struct virtio_device_id id_table[] = { 2082 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 2083 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 2084 { 0 }, 2085 }; 2086 2087 static void vduse_mgmtdev_release(struct device *dev) 2088 { 2089 struct vduse_mgmt_dev *mgmt_dev; 2090 2091 mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev); 2092 kfree(mgmt_dev); 2093 } 2094 2095 static int vduse_mgmtdev_init(void) 2096 { 2097 int ret; 2098 2099 vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL); 2100 if (!vduse_mgmt) 2101 return -ENOMEM; 2102 2103 ret = dev_set_name(&vduse_mgmt->dev, "vduse"); 2104 if (ret) { 2105 kfree(vduse_mgmt); 2106 return ret; 2107 } 2108 2109 vduse_mgmt->dev.release = vduse_mgmtdev_release; 2110 2111 ret = device_register(&vduse_mgmt->dev); 2112 if (ret) 2113 goto dev_reg_err; 2114 2115 vduse_mgmt->mgmt_dev.id_table = id_table; 2116 vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops; 2117 vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev; 2118 ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev); 2119 if (ret) 2120 device_unregister(&vduse_mgmt->dev); 2121 2122 return ret; 2123 2124 dev_reg_err: 2125 put_device(&vduse_mgmt->dev); 2126 return ret; 2127 } 2128 2129 static void vduse_mgmtdev_exit(void) 2130 { 2131 vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev); 2132 device_unregister(&vduse_mgmt->dev); 2133 } 2134 2135 static int vduse_init(void) 2136 { 2137 int ret; 2138 struct device *dev; 2139 2140 ret = class_register(&vduse_class); 2141 if (ret) 2142 return ret; 2143 2144 ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse"); 2145 if (ret) 2146 goto err_chardev_region; 2147 2148 /* /dev/vduse/control */ 2149 cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops); 2150 vduse_ctrl_cdev.owner = THIS_MODULE; 2151 ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1); 2152 if (ret) 2153 goto err_ctrl_cdev; 2154 2155 dev = device_create(&vduse_class, NULL, vduse_major, NULL, "control"); 2156 if (IS_ERR(dev)) { 2157 ret = PTR_ERR(dev); 2158 goto err_device; 2159 } 2160 2161 /* /dev/vduse/$DEVICE */ 2162 cdev_init(&vduse_cdev, &vduse_dev_fops); 2163 vduse_cdev.owner = THIS_MODULE; 2164 ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1), 2165 VDUSE_DEV_MAX - 1); 2166 if (ret) 2167 goto err_cdev; 2168 2169 ret = -ENOMEM; 2170 vduse_irq_wq = alloc_workqueue("vduse-irq", 2171 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0); 2172 if (!vduse_irq_wq) 2173 goto err_wq; 2174 2175 vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0); 2176 if (!vduse_irq_bound_wq) 2177 goto err_bound_wq; 2178 2179 ret = vduse_domain_init(); 2180 if (ret) 2181 goto err_domain; 2182 2183 ret = vduse_mgmtdev_init(); 2184 if (ret) 2185 goto err_mgmtdev; 2186 2187 return 0; 2188 err_mgmtdev: 2189 vduse_domain_exit(); 2190 err_domain: 2191 destroy_workqueue(vduse_irq_bound_wq); 2192 err_bound_wq: 2193 destroy_workqueue(vduse_irq_wq); 2194 err_wq: 2195 cdev_del(&vduse_cdev); 2196 err_cdev: 2197 device_destroy(&vduse_class, vduse_major); 2198 err_device: 2199 cdev_del(&vduse_ctrl_cdev); 2200 err_ctrl_cdev: 2201 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); 2202 err_chardev_region: 2203 class_unregister(&vduse_class); 2204 return ret; 2205 } 2206 module_init(vduse_init); 2207 2208 static void vduse_exit(void) 2209 { 2210 vduse_mgmtdev_exit(); 2211 vduse_domain_exit(); 2212 destroy_workqueue(vduse_irq_bound_wq); 2213 destroy_workqueue(vduse_irq_wq); 2214 cdev_del(&vduse_cdev); 2215 device_destroy(&vduse_class, vduse_major); 2216 cdev_del(&vduse_ctrl_cdev); 2217 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); 2218 class_unregister(&vduse_class); 2219 } 2220 module_exit(vduse_exit); 2221 2222 MODULE_LICENSE(DRV_LICENSE); 2223 MODULE_AUTHOR(DRV_AUTHOR); 2224 MODULE_DESCRIPTION(DRV_DESC); 2225