1 /* 2 * vhost-vdpa 3 * 4 * Copyright(c) 2017-2018 Intel Corporation. 5 * Copyright(c) 2020 Red Hat, Inc. 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or later. 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 #include "qemu/osdep.h" 13 #include <linux/vhost.h> 14 #include <linux/vfio.h> 15 #include <sys/eventfd.h> 16 #include <sys/ioctl.h> 17 #include "hw/virtio/vhost.h" 18 #include "hw/virtio/vhost-backend.h" 19 #include "hw/virtio/virtio-net.h" 20 #include "hw/virtio/vhost-shadow-virtqueue.h" 21 #include "hw/virtio/vhost-vdpa.h" 22 #include "exec/address-spaces.h" 23 #include "qemu/main-loop.h" 24 #include "cpu.h" 25 #include "trace.h" 26 #include "qemu-common.h" 27 #include "qapi/error.h" 28 29 /* 30 * Return one past the end of the end of section. Be careful with uint64_t 31 * conversions! 32 */ 33 static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section) 34 { 35 Int128 llend = int128_make64(section->offset_within_address_space); 36 llend = int128_add(llend, section->size); 37 llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); 38 39 return llend; 40 } 41 42 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, 43 uint64_t iova_min, 44 uint64_t iova_max) 45 { 46 Int128 llend; 47 48 if ((!memory_region_is_ram(section->mr) && 49 !memory_region_is_iommu(section->mr)) || 50 memory_region_is_protected(section->mr) || 51 /* vhost-vDPA doesn't allow MMIO to be mapped */ 52 memory_region_is_ram_device(section->mr)) { 53 return true; 54 } 55 56 if (section->offset_within_address_space < iova_min) { 57 error_report("RAM section out of device range (min=0x%" PRIx64 58 ", addr=0x%" HWADDR_PRIx ")", 59 iova_min, section->offset_within_address_space); 60 return true; 61 } 62 63 llend = vhost_vdpa_section_end(section); 64 if (int128_gt(llend, int128_make64(iova_max))) { 65 error_report("RAM section out of device range (max=0x%" PRIx64 66 ", end addr=0x%" PRIx64 ")", 67 iova_max, int128_get64(llend)); 68 return true; 69 } 70 71 return false; 72 } 73 74 static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, 75 void *vaddr, bool readonly) 76 { 77 struct vhost_msg_v2 msg = {}; 78 int fd = v->device_fd; 79 int ret = 0; 80 81 msg.type = v->msg_type; 82 msg.iotlb.iova = iova; 83 msg.iotlb.size = size; 84 msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr; 85 msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; 86 msg.iotlb.type = VHOST_IOTLB_UPDATE; 87 88 trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.iotlb.iova, msg.iotlb.size, 89 msg.iotlb.uaddr, msg.iotlb.perm, msg.iotlb.type); 90 91 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 92 error_report("failed to write, fd=%d, errno=%d (%s)", 93 fd, errno, strerror(errno)); 94 return -EIO ; 95 } 96 97 return ret; 98 } 99 100 static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, 101 hwaddr size) 102 { 103 struct vhost_msg_v2 msg = {}; 104 int fd = v->device_fd; 105 int ret = 0; 106 107 msg.type = v->msg_type; 108 msg.iotlb.iova = iova; 109 msg.iotlb.size = size; 110 msg.iotlb.type = VHOST_IOTLB_INVALIDATE; 111 112 trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.iotlb.iova, 113 msg.iotlb.size, msg.iotlb.type); 114 115 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 116 error_report("failed to write, fd=%d, errno=%d (%s)", 117 fd, errno, strerror(errno)); 118 return -EIO ; 119 } 120 121 return ret; 122 } 123 124 static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v) 125 { 126 int fd = v->device_fd; 127 struct vhost_msg_v2 msg = { 128 .type = v->msg_type, 129 .iotlb.type = VHOST_IOTLB_BATCH_BEGIN, 130 }; 131 132 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 133 error_report("failed to write, fd=%d, errno=%d (%s)", 134 fd, errno, strerror(errno)); 135 } 136 } 137 138 static void vhost_vdpa_iotlb_batch_begin_once(struct vhost_vdpa *v) 139 { 140 if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) && 141 !v->iotlb_batch_begin_sent) { 142 vhost_vdpa_listener_begin_batch(v); 143 } 144 145 v->iotlb_batch_begin_sent = true; 146 } 147 148 static void vhost_vdpa_listener_commit(MemoryListener *listener) 149 { 150 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 151 struct vhost_dev *dev = v->dev; 152 struct vhost_msg_v2 msg = {}; 153 int fd = v->device_fd; 154 155 if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { 156 return; 157 } 158 159 if (!v->iotlb_batch_begin_sent) { 160 return; 161 } 162 163 msg.type = v->msg_type; 164 msg.iotlb.type = VHOST_IOTLB_BATCH_END; 165 166 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 167 error_report("failed to write, fd=%d, errno=%d (%s)", 168 fd, errno, strerror(errno)); 169 } 170 171 v->iotlb_batch_begin_sent = false; 172 } 173 174 static void vhost_vdpa_listener_region_add(MemoryListener *listener, 175 MemoryRegionSection *section) 176 { 177 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 178 hwaddr iova; 179 Int128 llend, llsize; 180 void *vaddr; 181 int ret; 182 183 if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, 184 v->iova_range.last)) { 185 return; 186 } 187 188 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 189 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 190 error_report("%s received unaligned region", __func__); 191 return; 192 } 193 194 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 195 llend = vhost_vdpa_section_end(section); 196 if (int128_ge(int128_make64(iova), llend)) { 197 return; 198 } 199 200 memory_region_ref(section->mr); 201 202 /* Here we assume that memory_region_is_ram(section->mr)==true */ 203 204 vaddr = memory_region_get_ram_ptr(section->mr) + 205 section->offset_within_region + 206 (iova - section->offset_within_address_space); 207 208 trace_vhost_vdpa_listener_region_add(v, iova, int128_get64(llend), 209 vaddr, section->readonly); 210 211 llsize = int128_sub(llend, int128_make64(iova)); 212 if (v->shadow_vqs_enabled) { 213 DMAMap mem_region = { 214 .translated_addr = (hwaddr)(uintptr_t)vaddr, 215 .size = int128_get64(llsize) - 1, 216 .perm = IOMMU_ACCESS_FLAG(true, section->readonly), 217 }; 218 219 int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); 220 if (unlikely(r != IOVA_OK)) { 221 error_report("Can't allocate a mapping (%d)", r); 222 goto fail; 223 } 224 225 iova = mem_region.iova; 226 } 227 228 vhost_vdpa_iotlb_batch_begin_once(v); 229 ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), 230 vaddr, section->readonly); 231 if (ret) { 232 error_report("vhost vdpa map fail!"); 233 goto fail; 234 } 235 236 return; 237 238 fail: 239 /* 240 * On the initfn path, store the first error in the container so we 241 * can gracefully fail. Runtime, there's not much we can do other 242 * than throw a hardware error. 243 */ 244 error_report("vhost-vdpa: DMA mapping failed, unable to continue"); 245 return; 246 247 } 248 249 static void vhost_vdpa_listener_region_del(MemoryListener *listener, 250 MemoryRegionSection *section) 251 { 252 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 253 hwaddr iova; 254 Int128 llend, llsize; 255 int ret; 256 257 if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, 258 v->iova_range.last)) { 259 return; 260 } 261 262 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 263 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 264 error_report("%s received unaligned region", __func__); 265 return; 266 } 267 268 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 269 llend = vhost_vdpa_section_end(section); 270 271 trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(llend)); 272 273 if (int128_ge(int128_make64(iova), llend)) { 274 return; 275 } 276 277 llsize = int128_sub(llend, int128_make64(iova)); 278 279 if (v->shadow_vqs_enabled) { 280 const DMAMap *result; 281 const void *vaddr = memory_region_get_ram_ptr(section->mr) + 282 section->offset_within_region + 283 (iova - section->offset_within_address_space); 284 DMAMap mem_region = { 285 .translated_addr = (hwaddr)(uintptr_t)vaddr, 286 .size = int128_get64(llsize) - 1, 287 }; 288 289 result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region); 290 iova = result->iova; 291 vhost_iova_tree_remove(v->iova_tree, &mem_region); 292 } 293 vhost_vdpa_iotlb_batch_begin_once(v); 294 ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); 295 if (ret) { 296 error_report("vhost_vdpa dma unmap error!"); 297 } 298 299 memory_region_unref(section->mr); 300 } 301 /* 302 * IOTLB API is used by vhost-vpda which requires incremental updating 303 * of the mapping. So we can not use generic vhost memory listener which 304 * depends on the addnop(). 305 */ 306 static const MemoryListener vhost_vdpa_memory_listener = { 307 .name = "vhost-vdpa", 308 .commit = vhost_vdpa_listener_commit, 309 .region_add = vhost_vdpa_listener_region_add, 310 .region_del = vhost_vdpa_listener_region_del, 311 }; 312 313 static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, 314 void *arg) 315 { 316 struct vhost_vdpa *v = dev->opaque; 317 int fd = v->device_fd; 318 int ret; 319 320 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 321 322 ret = ioctl(fd, request, arg); 323 return ret < 0 ? -errno : ret; 324 } 325 326 static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) 327 { 328 uint8_t s; 329 int ret; 330 331 trace_vhost_vdpa_add_status(dev, status); 332 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s); 333 if (ret < 0) { 334 return ret; 335 } 336 337 s |= status; 338 339 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s); 340 if (ret < 0) { 341 return ret; 342 } 343 344 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s); 345 if (ret < 0) { 346 return ret; 347 } 348 349 if (!(s & status)) { 350 return -EIO; 351 } 352 353 return 0; 354 } 355 356 static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v) 357 { 358 int ret = vhost_vdpa_call(v->dev, VHOST_VDPA_GET_IOVA_RANGE, 359 &v->iova_range); 360 if (ret != 0) { 361 v->iova_range.first = 0; 362 v->iova_range.last = UINT64_MAX; 363 } 364 365 trace_vhost_vdpa_get_iova_range(v->dev, v->iova_range.first, 366 v->iova_range.last); 367 } 368 369 static bool vhost_vdpa_one_time_request(struct vhost_dev *dev) 370 { 371 struct vhost_vdpa *v = dev->opaque; 372 373 return v->index != 0; 374 } 375 376 static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, 377 Error **errp) 378 { 379 g_autoptr(GPtrArray) shadow_vqs = NULL; 380 uint64_t dev_features, svq_features; 381 int r; 382 bool ok; 383 384 if (!v->shadow_vqs_enabled) { 385 return 0; 386 } 387 388 r = hdev->vhost_ops->vhost_get_features(hdev, &dev_features); 389 if (r != 0) { 390 error_setg_errno(errp, -r, "Can't get vdpa device features"); 391 return r; 392 } 393 394 svq_features = dev_features; 395 ok = vhost_svq_valid_features(svq_features, errp); 396 if (unlikely(!ok)) { 397 return -1; 398 } 399 400 shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); 401 for (unsigned n = 0; n < hdev->nvqs; ++n) { 402 g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree); 403 404 if (unlikely(!svq)) { 405 error_setg(errp, "Cannot create svq %u", n); 406 return -1; 407 } 408 g_ptr_array_add(shadow_vqs, g_steal_pointer(&svq)); 409 } 410 411 v->shadow_vqs = g_steal_pointer(&shadow_vqs); 412 return 0; 413 } 414 415 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) 416 { 417 struct vhost_vdpa *v; 418 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 419 trace_vhost_vdpa_init(dev, opaque); 420 int ret; 421 422 /* 423 * Similar to VFIO, we end up pinning all guest memory and have to 424 * disable discarding of RAM. 425 */ 426 ret = ram_block_discard_disable(true); 427 if (ret) { 428 error_report("Cannot set discarding of RAM broken"); 429 return ret; 430 } 431 432 v = opaque; 433 v->dev = dev; 434 dev->opaque = opaque ; 435 v->listener = vhost_vdpa_memory_listener; 436 v->msg_type = VHOST_IOTLB_MSG_V2; 437 ret = vhost_vdpa_init_svq(dev, v, errp); 438 if (ret) { 439 goto err; 440 } 441 442 vhost_vdpa_get_iova_range(v); 443 444 if (vhost_vdpa_one_time_request(dev)) { 445 return 0; 446 } 447 448 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 449 VIRTIO_CONFIG_S_DRIVER); 450 451 return 0; 452 453 err: 454 ram_block_discard_disable(false); 455 return ret; 456 } 457 458 static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev, 459 int queue_index) 460 { 461 size_t page_size = qemu_real_host_page_size; 462 struct vhost_vdpa *v = dev->opaque; 463 VirtIODevice *vdev = dev->vdev; 464 VhostVDPAHostNotifier *n; 465 466 n = &v->notifier[queue_index]; 467 468 if (n->addr) { 469 virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, false); 470 object_unparent(OBJECT(&n->mr)); 471 munmap(n->addr, page_size); 472 n->addr = NULL; 473 } 474 } 475 476 static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index) 477 { 478 size_t page_size = qemu_real_host_page_size; 479 struct vhost_vdpa *v = dev->opaque; 480 VirtIODevice *vdev = dev->vdev; 481 VhostVDPAHostNotifier *n; 482 int fd = v->device_fd; 483 void *addr; 484 char *name; 485 486 vhost_vdpa_host_notifier_uninit(dev, queue_index); 487 488 n = &v->notifier[queue_index]; 489 490 addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd, 491 queue_index * page_size); 492 if (addr == MAP_FAILED) { 493 goto err; 494 } 495 496 name = g_strdup_printf("vhost-vdpa/host-notifier@%p mmaps[%d]", 497 v, queue_index); 498 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 499 page_size, addr); 500 g_free(name); 501 502 if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) { 503 object_unparent(OBJECT(&n->mr)); 504 munmap(addr, page_size); 505 goto err; 506 } 507 n->addr = addr; 508 509 return 0; 510 511 err: 512 return -1; 513 } 514 515 static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n) 516 { 517 int i; 518 519 for (i = dev->vq_index; i < dev->vq_index + n; i++) { 520 vhost_vdpa_host_notifier_uninit(dev, i); 521 } 522 } 523 524 static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev) 525 { 526 struct vhost_vdpa *v = dev->opaque; 527 int i; 528 529 if (v->shadow_vqs_enabled) { 530 /* FIXME SVQ is not compatible with host notifiers mr */ 531 return; 532 } 533 534 for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) { 535 if (vhost_vdpa_host_notifier_init(dev, i)) { 536 goto err; 537 } 538 } 539 540 return; 541 542 err: 543 vhost_vdpa_host_notifiers_uninit(dev, i - dev->vq_index); 544 return; 545 } 546 547 static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev) 548 { 549 struct vhost_vdpa *v = dev->opaque; 550 size_t idx; 551 552 if (!v->shadow_vqs) { 553 return; 554 } 555 556 for (idx = 0; idx < v->shadow_vqs->len; ++idx) { 557 vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx)); 558 } 559 g_ptr_array_free(v->shadow_vqs, true); 560 } 561 562 static int vhost_vdpa_cleanup(struct vhost_dev *dev) 563 { 564 struct vhost_vdpa *v; 565 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 566 v = dev->opaque; 567 trace_vhost_vdpa_cleanup(dev, v); 568 vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); 569 memory_listener_unregister(&v->listener); 570 vhost_vdpa_svq_cleanup(dev); 571 572 dev->opaque = NULL; 573 ram_block_discard_disable(false); 574 575 return 0; 576 } 577 578 static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) 579 { 580 trace_vhost_vdpa_memslots_limit(dev, INT_MAX); 581 return INT_MAX; 582 } 583 584 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, 585 struct vhost_memory *mem) 586 { 587 if (vhost_vdpa_one_time_request(dev)) { 588 return 0; 589 } 590 591 trace_vhost_vdpa_set_mem_table(dev, mem->nregions, mem->padding); 592 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE) && 593 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS)) { 594 int i; 595 for (i = 0; i < mem->nregions; i++) { 596 trace_vhost_vdpa_dump_regions(dev, i, 597 mem->regions[i].guest_phys_addr, 598 mem->regions[i].memory_size, 599 mem->regions[i].userspace_addr, 600 mem->regions[i].flags_padding); 601 } 602 } 603 if (mem->padding) { 604 return -EINVAL; 605 } 606 607 return 0; 608 } 609 610 static int vhost_vdpa_set_features(struct vhost_dev *dev, 611 uint64_t features) 612 { 613 int ret; 614 615 if (vhost_vdpa_one_time_request(dev)) { 616 return 0; 617 } 618 619 trace_vhost_vdpa_set_features(dev, features); 620 ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features); 621 if (ret) { 622 return ret; 623 } 624 625 return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 626 } 627 628 static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) 629 { 630 uint64_t features; 631 uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | 632 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH; 633 int r; 634 635 if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { 636 return -EFAULT; 637 } 638 639 features &= f; 640 641 if (vhost_vdpa_one_time_request(dev)) { 642 r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); 643 if (r) { 644 return -EFAULT; 645 } 646 } 647 648 dev->backend_cap = features; 649 650 return 0; 651 } 652 653 static int vhost_vdpa_get_device_id(struct vhost_dev *dev, 654 uint32_t *device_id) 655 { 656 int ret; 657 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id); 658 trace_vhost_vdpa_get_device_id(dev, *device_id); 659 return ret; 660 } 661 662 static void vhost_vdpa_reset_svq(struct vhost_vdpa *v) 663 { 664 if (!v->shadow_vqs_enabled) { 665 return; 666 } 667 668 for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { 669 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); 670 vhost_svq_stop(svq); 671 } 672 } 673 674 static int vhost_vdpa_reset_device(struct vhost_dev *dev) 675 { 676 struct vhost_vdpa *v = dev->opaque; 677 int ret; 678 uint8_t status = 0; 679 680 vhost_vdpa_reset_svq(v); 681 682 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); 683 trace_vhost_vdpa_reset_device(dev, status); 684 return ret; 685 } 686 687 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) 688 { 689 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 690 691 trace_vhost_vdpa_get_vq_index(dev, idx, idx); 692 return idx; 693 } 694 695 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) 696 { 697 int i; 698 trace_vhost_vdpa_set_vring_ready(dev); 699 for (i = 0; i < dev->nvqs; ++i) { 700 struct vhost_vring_state state = { 701 .index = dev->vq_index + i, 702 .num = 1, 703 }; 704 vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); 705 } 706 return 0; 707 } 708 709 static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config, 710 uint32_t config_len) 711 { 712 int b, len; 713 char line[QEMU_HEXDUMP_LINE_LEN]; 714 715 for (b = 0; b < config_len; b += 16) { 716 len = config_len - b; 717 qemu_hexdump_line(line, b, config, len, false); 718 trace_vhost_vdpa_dump_config(dev, line); 719 } 720 } 721 722 static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data, 723 uint32_t offset, uint32_t size, 724 uint32_t flags) 725 { 726 struct vhost_vdpa_config *config; 727 int ret; 728 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 729 730 trace_vhost_vdpa_set_config(dev, offset, size, flags); 731 config = g_malloc(size + config_size); 732 config->off = offset; 733 config->len = size; 734 memcpy(config->buf, data, size); 735 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG) && 736 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) { 737 vhost_vdpa_dump_config(dev, data, size); 738 } 739 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config); 740 g_free(config); 741 return ret; 742 } 743 744 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config, 745 uint32_t config_len, Error **errp) 746 { 747 struct vhost_vdpa_config *v_config; 748 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 749 int ret; 750 751 trace_vhost_vdpa_get_config(dev, config, config_len); 752 v_config = g_malloc(config_len + config_size); 753 v_config->len = config_len; 754 v_config->off = 0; 755 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config); 756 memcpy(config, v_config->buf, config_len); 757 g_free(v_config); 758 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG) && 759 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) { 760 vhost_vdpa_dump_config(dev, config, config_len); 761 } 762 return ret; 763 } 764 765 static int vhost_vdpa_set_dev_vring_base(struct vhost_dev *dev, 766 struct vhost_vring_state *ring) 767 { 768 trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num); 769 return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring); 770 } 771 772 static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev, 773 struct vhost_vring_file *file) 774 { 775 trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd); 776 return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file); 777 } 778 779 static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev, 780 struct vhost_vring_file *file) 781 { 782 trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd); 783 return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file); 784 } 785 786 static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev, 787 struct vhost_vring_addr *addr) 788 { 789 trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags, 790 addr->desc_user_addr, addr->used_user_addr, 791 addr->avail_user_addr, 792 addr->log_guest_addr); 793 794 return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr); 795 796 } 797 798 /** 799 * Set the shadow virtqueue descriptors to the device 800 * 801 * @dev: The vhost device model 802 * @svq: The shadow virtqueue 803 * @idx: The index of the virtqueue in the vhost device 804 * @errp: Error 805 * 806 * Note that this function does not rewind kick file descriptor if cannot set 807 * call one. 808 */ 809 static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, 810 VhostShadowVirtqueue *svq, unsigned idx, 811 Error **errp) 812 { 813 struct vhost_vring_file file = { 814 .index = dev->vq_index + idx, 815 }; 816 const EventNotifier *event_notifier = &svq->hdev_kick; 817 int r; 818 819 file.fd = event_notifier_get_fd(event_notifier); 820 r = vhost_vdpa_set_vring_dev_kick(dev, &file); 821 if (unlikely(r != 0)) { 822 error_setg_errno(errp, -r, "Can't set device kick fd"); 823 return r; 824 } 825 826 event_notifier = &svq->hdev_call; 827 file.fd = event_notifier_get_fd(event_notifier); 828 r = vhost_vdpa_set_vring_dev_call(dev, &file); 829 if (unlikely(r != 0)) { 830 error_setg_errno(errp, -r, "Can't set device call fd"); 831 } 832 833 return r; 834 } 835 836 /** 837 * Unmap a SVQ area in the device 838 */ 839 static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, 840 const DMAMap *needle) 841 { 842 const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle); 843 hwaddr size; 844 int r; 845 846 if (unlikely(!result)) { 847 error_report("Unable to find SVQ address to unmap"); 848 return false; 849 } 850 851 size = ROUND_UP(result->size, qemu_real_host_page_size); 852 r = vhost_vdpa_dma_unmap(v, result->iova, size); 853 return r == 0; 854 } 855 856 static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, 857 const VhostShadowVirtqueue *svq) 858 { 859 DMAMap needle = {}; 860 struct vhost_vdpa *v = dev->opaque; 861 struct vhost_vring_addr svq_addr; 862 bool ok; 863 864 vhost_svq_get_vring_addr(svq, &svq_addr); 865 866 needle.translated_addr = svq_addr.desc_user_addr; 867 ok = vhost_vdpa_svq_unmap_ring(v, &needle); 868 if (unlikely(!ok)) { 869 return false; 870 } 871 872 needle.translated_addr = svq_addr.used_user_addr; 873 return vhost_vdpa_svq_unmap_ring(v, &needle); 874 } 875 876 /** 877 * Map the SVQ area in the device 878 * 879 * @v: Vhost-vdpa device 880 * @needle: The area to search iova 881 * @errorp: Error pointer 882 */ 883 static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, 884 Error **errp) 885 { 886 int r; 887 888 r = vhost_iova_tree_map_alloc(v->iova_tree, needle); 889 if (unlikely(r != IOVA_OK)) { 890 error_setg(errp, "Cannot allocate iova (%d)", r); 891 return false; 892 } 893 894 r = vhost_vdpa_dma_map(v, needle->iova, needle->size + 1, 895 (void *)(uintptr_t)needle->translated_addr, 896 needle->perm == IOMMU_RO); 897 if (unlikely(r != 0)) { 898 error_setg_errno(errp, -r, "Cannot map region to device"); 899 vhost_iova_tree_remove(v->iova_tree, needle); 900 } 901 902 return r == 0; 903 } 904 905 /** 906 * Map the shadow virtqueue rings in the device 907 * 908 * @dev: The vhost device 909 * @svq: The shadow virtqueue 910 * @addr: Assigned IOVA addresses 911 * @errp: Error pointer 912 */ 913 static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev, 914 const VhostShadowVirtqueue *svq, 915 struct vhost_vring_addr *addr, 916 Error **errp) 917 { 918 DMAMap device_region, driver_region; 919 struct vhost_vring_addr svq_addr; 920 struct vhost_vdpa *v = dev->opaque; 921 size_t device_size = vhost_svq_device_area_size(svq); 922 size_t driver_size = vhost_svq_driver_area_size(svq); 923 size_t avail_offset; 924 bool ok; 925 926 ERRP_GUARD(); 927 vhost_svq_get_vring_addr(svq, &svq_addr); 928 929 driver_region = (DMAMap) { 930 .translated_addr = svq_addr.desc_user_addr, 931 .size = driver_size - 1, 932 .perm = IOMMU_RO, 933 }; 934 ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp); 935 if (unlikely(!ok)) { 936 error_prepend(errp, "Cannot create vq driver region: "); 937 return false; 938 } 939 addr->desc_user_addr = driver_region.iova; 940 avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr; 941 addr->avail_user_addr = driver_region.iova + avail_offset; 942 943 device_region = (DMAMap) { 944 .translated_addr = svq_addr.used_user_addr, 945 .size = device_size - 1, 946 .perm = IOMMU_RW, 947 }; 948 ok = vhost_vdpa_svq_map_ring(v, &device_region, errp); 949 if (unlikely(!ok)) { 950 error_prepend(errp, "Cannot create vq device region: "); 951 vhost_vdpa_svq_unmap_ring(v, &driver_region); 952 } 953 addr->used_user_addr = device_region.iova; 954 955 return ok; 956 } 957 958 static bool vhost_vdpa_svq_setup(struct vhost_dev *dev, 959 VhostShadowVirtqueue *svq, unsigned idx, 960 Error **errp) 961 { 962 uint16_t vq_index = dev->vq_index + idx; 963 struct vhost_vring_state s = { 964 .index = vq_index, 965 }; 966 int r; 967 968 r = vhost_vdpa_set_dev_vring_base(dev, &s); 969 if (unlikely(r)) { 970 error_setg_errno(errp, -r, "Cannot set vring base"); 971 return false; 972 } 973 974 r = vhost_vdpa_svq_set_fds(dev, svq, idx, errp); 975 return r == 0; 976 } 977 978 static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) 979 { 980 struct vhost_vdpa *v = dev->opaque; 981 Error *err = NULL; 982 unsigned i; 983 984 if (!v->shadow_vqs) { 985 return true; 986 } 987 988 for (i = 0; i < v->shadow_vqs->len; ++i) { 989 VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); 990 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); 991 struct vhost_vring_addr addr = { 992 .index = i, 993 }; 994 int r; 995 bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err); 996 if (unlikely(!ok)) { 997 goto err; 998 } 999 1000 vhost_svq_start(svq, dev->vdev, vq); 1001 ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err); 1002 if (unlikely(!ok)) { 1003 goto err_map; 1004 } 1005 1006 /* Override vring GPA set by vhost subsystem */ 1007 r = vhost_vdpa_set_vring_dev_addr(dev, &addr); 1008 if (unlikely(r != 0)) { 1009 error_setg_errno(&err, -r, "Cannot set device address"); 1010 goto err_set_addr; 1011 } 1012 } 1013 1014 return true; 1015 1016 err_set_addr: 1017 vhost_vdpa_svq_unmap_rings(dev, g_ptr_array_index(v->shadow_vqs, i)); 1018 1019 err_map: 1020 vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, i)); 1021 1022 err: 1023 error_reportf_err(err, "Cannot setup SVQ %u: ", i); 1024 for (unsigned j = 0; j < i; ++j) { 1025 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, j); 1026 vhost_vdpa_svq_unmap_rings(dev, svq); 1027 vhost_svq_stop(svq); 1028 } 1029 1030 return false; 1031 } 1032 1033 static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev) 1034 { 1035 struct vhost_vdpa *v = dev->opaque; 1036 1037 if (!v->shadow_vqs) { 1038 return true; 1039 } 1040 1041 for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { 1042 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); 1043 bool ok = vhost_vdpa_svq_unmap_rings(dev, svq); 1044 if (unlikely(!ok)) { 1045 return false; 1046 } 1047 } 1048 1049 return true; 1050 } 1051 1052 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) 1053 { 1054 struct vhost_vdpa *v = dev->opaque; 1055 bool ok; 1056 trace_vhost_vdpa_dev_start(dev, started); 1057 1058 if (started) { 1059 vhost_vdpa_host_notifiers_init(dev); 1060 ok = vhost_vdpa_svqs_start(dev); 1061 if (unlikely(!ok)) { 1062 return -1; 1063 } 1064 vhost_vdpa_set_vring_ready(dev); 1065 } else { 1066 ok = vhost_vdpa_svqs_stop(dev); 1067 if (unlikely(!ok)) { 1068 return -1; 1069 } 1070 vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); 1071 } 1072 1073 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 1074 return 0; 1075 } 1076 1077 if (started) { 1078 memory_listener_register(&v->listener, &address_space_memory); 1079 return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); 1080 } else { 1081 vhost_vdpa_reset_device(dev); 1082 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 1083 VIRTIO_CONFIG_S_DRIVER); 1084 memory_listener_unregister(&v->listener); 1085 1086 return 0; 1087 } 1088 } 1089 1090 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, 1091 struct vhost_log *log) 1092 { 1093 if (vhost_vdpa_one_time_request(dev)) { 1094 return 0; 1095 } 1096 1097 trace_vhost_vdpa_set_log_base(dev, base, log->size, log->refcnt, log->fd, 1098 log->log); 1099 return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base); 1100 } 1101 1102 static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, 1103 struct vhost_vring_addr *addr) 1104 { 1105 struct vhost_vdpa *v = dev->opaque; 1106 1107 if (v->shadow_vqs_enabled) { 1108 /* 1109 * Device vring addr was set at device start. SVQ base is handled by 1110 * VirtQueue code. 1111 */ 1112 return 0; 1113 } 1114 1115 return vhost_vdpa_set_vring_dev_addr(dev, addr); 1116 } 1117 1118 static int vhost_vdpa_set_vring_num(struct vhost_dev *dev, 1119 struct vhost_vring_state *ring) 1120 { 1121 trace_vhost_vdpa_set_vring_num(dev, ring->index, ring->num); 1122 return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring); 1123 } 1124 1125 static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, 1126 struct vhost_vring_state *ring) 1127 { 1128 struct vhost_vdpa *v = dev->opaque; 1129 1130 if (v->shadow_vqs_enabled) { 1131 /* 1132 * Device vring base was set at device start. SVQ base is handled by 1133 * VirtQueue code. 1134 */ 1135 return 0; 1136 } 1137 1138 return vhost_vdpa_set_dev_vring_base(dev, ring); 1139 } 1140 1141 static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, 1142 struct vhost_vring_state *ring) 1143 { 1144 int ret; 1145 1146 ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); 1147 trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num); 1148 return ret; 1149 } 1150 1151 static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev, 1152 struct vhost_vring_file *file) 1153 { 1154 struct vhost_vdpa *v = dev->opaque; 1155 int vdpa_idx = file->index - dev->vq_index; 1156 1157 if (v->shadow_vqs_enabled) { 1158 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); 1159 vhost_svq_set_svq_kick_fd(svq, file->fd); 1160 return 0; 1161 } else { 1162 return vhost_vdpa_set_vring_dev_kick(dev, file); 1163 } 1164 } 1165 1166 static int vhost_vdpa_set_vring_call(struct vhost_dev *dev, 1167 struct vhost_vring_file *file) 1168 { 1169 struct vhost_vdpa *v = dev->opaque; 1170 1171 if (v->shadow_vqs_enabled) { 1172 int vdpa_idx = file->index - dev->vq_index; 1173 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); 1174 1175 vhost_svq_set_svq_call_fd(svq, file->fd); 1176 return 0; 1177 } else { 1178 return vhost_vdpa_set_vring_dev_call(dev, file); 1179 } 1180 } 1181 1182 static int vhost_vdpa_get_features(struct vhost_dev *dev, 1183 uint64_t *features) 1184 { 1185 int ret; 1186 1187 ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features); 1188 trace_vhost_vdpa_get_features(dev, *features); 1189 return ret; 1190 } 1191 1192 static int vhost_vdpa_set_owner(struct vhost_dev *dev) 1193 { 1194 if (vhost_vdpa_one_time_request(dev)) { 1195 return 0; 1196 } 1197 1198 trace_vhost_vdpa_set_owner(dev); 1199 return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); 1200 } 1201 1202 static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev, 1203 struct vhost_vring_addr *addr, struct vhost_virtqueue *vq) 1204 { 1205 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 1206 addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys; 1207 addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys; 1208 addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys; 1209 trace_vhost_vdpa_vq_get_addr(dev, vq, addr->desc_user_addr, 1210 addr->avail_user_addr, addr->used_user_addr); 1211 return 0; 1212 } 1213 1214 static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) 1215 { 1216 return true; 1217 } 1218 1219 const VhostOps vdpa_ops = { 1220 .backend_type = VHOST_BACKEND_TYPE_VDPA, 1221 .vhost_backend_init = vhost_vdpa_init, 1222 .vhost_backend_cleanup = vhost_vdpa_cleanup, 1223 .vhost_set_log_base = vhost_vdpa_set_log_base, 1224 .vhost_set_vring_addr = vhost_vdpa_set_vring_addr, 1225 .vhost_set_vring_num = vhost_vdpa_set_vring_num, 1226 .vhost_set_vring_base = vhost_vdpa_set_vring_base, 1227 .vhost_get_vring_base = vhost_vdpa_get_vring_base, 1228 .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, 1229 .vhost_set_vring_call = vhost_vdpa_set_vring_call, 1230 .vhost_get_features = vhost_vdpa_get_features, 1231 .vhost_set_backend_cap = vhost_vdpa_set_backend_cap, 1232 .vhost_set_owner = vhost_vdpa_set_owner, 1233 .vhost_set_vring_endian = NULL, 1234 .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, 1235 .vhost_set_mem_table = vhost_vdpa_set_mem_table, 1236 .vhost_set_features = vhost_vdpa_set_features, 1237 .vhost_reset_device = vhost_vdpa_reset_device, 1238 .vhost_get_vq_index = vhost_vdpa_get_vq_index, 1239 .vhost_get_config = vhost_vdpa_get_config, 1240 .vhost_set_config = vhost_vdpa_set_config, 1241 .vhost_requires_shm_log = NULL, 1242 .vhost_migration_done = NULL, 1243 .vhost_backend_can_merge = NULL, 1244 .vhost_net_set_mtu = NULL, 1245 .vhost_set_iotlb_callback = NULL, 1246 .vhost_send_device_iotlb_msg = NULL, 1247 .vhost_dev_start = vhost_vdpa_dev_start, 1248 .vhost_get_device_id = vhost_vdpa_get_device_id, 1249 .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, 1250 .vhost_force_iommu = vhost_vdpa_force_iommu, 1251 }; 1252