1 /* 2 * vhost-vdpa 3 * 4 * Copyright(c) 2017-2018 Intel Corporation. 5 * Copyright(c) 2020 Red Hat, Inc. 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or later. 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 #include "qemu/osdep.h" 13 #include <linux/vhost.h> 14 #include <linux/vfio.h> 15 #include <sys/eventfd.h> 16 #include <sys/ioctl.h> 17 #include "hw/virtio/vhost.h" 18 #include "hw/virtio/vhost-backend.h" 19 #include "hw/virtio/virtio-net.h" 20 #include "hw/virtio/vhost-shadow-virtqueue.h" 21 #include "hw/virtio/vhost-vdpa.h" 22 #include "exec/address-spaces.h" 23 #include "migration/blocker.h" 24 #include "qemu/cutils.h" 25 #include "qemu/main-loop.h" 26 #include "cpu.h" 27 #include "trace.h" 28 #include "qapi/error.h" 29 30 /* 31 * Return one past the end of the end of section. Be careful with uint64_t 32 * conversions! 33 */ 34 static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section) 35 { 36 Int128 llend = int128_make64(section->offset_within_address_space); 37 llend = int128_add(llend, section->size); 38 llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); 39 40 return llend; 41 } 42 43 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, 44 uint64_t iova_min, 45 uint64_t iova_max) 46 { 47 Int128 llend; 48 49 if ((!memory_region_is_ram(section->mr) && 50 !memory_region_is_iommu(section->mr)) || 51 memory_region_is_protected(section->mr) || 52 /* vhost-vDPA doesn't allow MMIO to be mapped */ 53 memory_region_is_ram_device(section->mr)) { 54 return true; 55 } 56 57 if (section->offset_within_address_space < iova_min) { 58 error_report("RAM section out of device range (min=0x%" PRIx64 59 ", addr=0x%" HWADDR_PRIx ")", 60 iova_min, section->offset_within_address_space); 61 return true; 62 } 63 64 llend = vhost_vdpa_section_end(section); 65 if (int128_gt(llend, int128_make64(iova_max))) { 66 error_report("RAM section out of device range (max=0x%" PRIx64 67 ", end addr=0x%" PRIx64 ")", 68 iova_max, int128_get64(llend)); 69 return true; 70 } 71 72 return false; 73 } 74 75 /* 76 * The caller must set asid = 0 if the device does not support asid. 77 * This is not an ABI break since it is set to 0 by the initializer anyway. 78 */ 79 int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, 80 hwaddr size, void *vaddr, bool readonly) 81 { 82 struct vhost_msg_v2 msg = {}; 83 int fd = v->device_fd; 84 int ret = 0; 85 86 msg.type = v->msg_type; 87 msg.asid = asid; 88 msg.iotlb.iova = iova; 89 msg.iotlb.size = size; 90 msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr; 91 msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; 92 msg.iotlb.type = VHOST_IOTLB_UPDATE; 93 94 trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.asid, msg.iotlb.iova, 95 msg.iotlb.size, msg.iotlb.uaddr, msg.iotlb.perm, 96 msg.iotlb.type); 97 98 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 99 error_report("failed to write, fd=%d, errno=%d (%s)", 100 fd, errno, strerror(errno)); 101 return -EIO ; 102 } 103 104 return ret; 105 } 106 107 /* 108 * The caller must set asid = 0 if the device does not support asid. 109 * This is not an ABI break since it is set to 0 by the initializer anyway. 110 */ 111 int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, 112 hwaddr size) 113 { 114 struct vhost_msg_v2 msg = {}; 115 int fd = v->device_fd; 116 int ret = 0; 117 118 msg.type = v->msg_type; 119 msg.asid = asid; 120 msg.iotlb.iova = iova; 121 msg.iotlb.size = size; 122 msg.iotlb.type = VHOST_IOTLB_INVALIDATE; 123 124 trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.asid, msg.iotlb.iova, 125 msg.iotlb.size, msg.iotlb.type); 126 127 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 128 error_report("failed to write, fd=%d, errno=%d (%s)", 129 fd, errno, strerror(errno)); 130 return -EIO ; 131 } 132 133 return ret; 134 } 135 136 static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v) 137 { 138 int fd = v->device_fd; 139 struct vhost_msg_v2 msg = { 140 .type = v->msg_type, 141 .iotlb.type = VHOST_IOTLB_BATCH_BEGIN, 142 }; 143 144 trace_vhost_vdpa_listener_begin_batch(v, fd, msg.type, msg.iotlb.type); 145 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 146 error_report("failed to write, fd=%d, errno=%d (%s)", 147 fd, errno, strerror(errno)); 148 } 149 } 150 151 static void vhost_vdpa_iotlb_batch_begin_once(struct vhost_vdpa *v) 152 { 153 if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) && 154 !v->iotlb_batch_begin_sent) { 155 vhost_vdpa_listener_begin_batch(v); 156 } 157 158 v->iotlb_batch_begin_sent = true; 159 } 160 161 static void vhost_vdpa_listener_commit(MemoryListener *listener) 162 { 163 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 164 struct vhost_dev *dev = v->dev; 165 struct vhost_msg_v2 msg = {}; 166 int fd = v->device_fd; 167 168 if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { 169 return; 170 } 171 172 if (!v->iotlb_batch_begin_sent) { 173 return; 174 } 175 176 msg.type = v->msg_type; 177 msg.iotlb.type = VHOST_IOTLB_BATCH_END; 178 179 trace_vhost_vdpa_listener_commit(v, fd, msg.type, msg.iotlb.type); 180 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 181 error_report("failed to write, fd=%d, errno=%d (%s)", 182 fd, errno, strerror(errno)); 183 } 184 185 v->iotlb_batch_begin_sent = false; 186 } 187 188 static void vhost_vdpa_listener_region_add(MemoryListener *listener, 189 MemoryRegionSection *section) 190 { 191 DMAMap mem_region = {}; 192 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 193 hwaddr iova; 194 Int128 llend, llsize; 195 void *vaddr; 196 int ret; 197 198 if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, 199 v->iova_range.last)) { 200 return; 201 } 202 203 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 204 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 205 error_report("%s received unaligned region", __func__); 206 return; 207 } 208 209 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 210 llend = vhost_vdpa_section_end(section); 211 if (int128_ge(int128_make64(iova), llend)) { 212 return; 213 } 214 215 memory_region_ref(section->mr); 216 217 /* Here we assume that memory_region_is_ram(section->mr)==true */ 218 219 vaddr = memory_region_get_ram_ptr(section->mr) + 220 section->offset_within_region + 221 (iova - section->offset_within_address_space); 222 223 trace_vhost_vdpa_listener_region_add(v, iova, int128_get64(llend), 224 vaddr, section->readonly); 225 226 llsize = int128_sub(llend, int128_make64(iova)); 227 if (v->shadow_data) { 228 int r; 229 230 mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr, 231 mem_region.size = int128_get64(llsize) - 1, 232 mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly), 233 234 r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); 235 if (unlikely(r != IOVA_OK)) { 236 error_report("Can't allocate a mapping (%d)", r); 237 goto fail; 238 } 239 240 iova = mem_region.iova; 241 } 242 243 vhost_vdpa_iotlb_batch_begin_once(v); 244 ret = vhost_vdpa_dma_map(v, VHOST_VDPA_GUEST_PA_ASID, iova, 245 int128_get64(llsize), vaddr, section->readonly); 246 if (ret) { 247 error_report("vhost vdpa map fail!"); 248 goto fail_map; 249 } 250 251 return; 252 253 fail_map: 254 if (v->shadow_data) { 255 vhost_iova_tree_remove(v->iova_tree, mem_region); 256 } 257 258 fail: 259 /* 260 * On the initfn path, store the first error in the container so we 261 * can gracefully fail. Runtime, there's not much we can do other 262 * than throw a hardware error. 263 */ 264 error_report("vhost-vdpa: DMA mapping failed, unable to continue"); 265 return; 266 267 } 268 269 static void vhost_vdpa_listener_region_del(MemoryListener *listener, 270 MemoryRegionSection *section) 271 { 272 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 273 hwaddr iova; 274 Int128 llend, llsize; 275 int ret; 276 277 if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, 278 v->iova_range.last)) { 279 return; 280 } 281 282 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 283 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 284 error_report("%s received unaligned region", __func__); 285 return; 286 } 287 288 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 289 llend = vhost_vdpa_section_end(section); 290 291 trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(llend)); 292 293 if (int128_ge(int128_make64(iova), llend)) { 294 return; 295 } 296 297 llsize = int128_sub(llend, int128_make64(iova)); 298 299 if (v->shadow_data) { 300 const DMAMap *result; 301 const void *vaddr = memory_region_get_ram_ptr(section->mr) + 302 section->offset_within_region + 303 (iova - section->offset_within_address_space); 304 DMAMap mem_region = { 305 .translated_addr = (hwaddr)(uintptr_t)vaddr, 306 .size = int128_get64(llsize) - 1, 307 }; 308 309 result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region); 310 if (!result) { 311 /* The memory listener map wasn't mapped */ 312 return; 313 } 314 iova = result->iova; 315 vhost_iova_tree_remove(v->iova_tree, *result); 316 } 317 vhost_vdpa_iotlb_batch_begin_once(v); 318 ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova, 319 int128_get64(llsize)); 320 if (ret) { 321 error_report("vhost_vdpa dma unmap error!"); 322 } 323 324 memory_region_unref(section->mr); 325 } 326 /* 327 * IOTLB API is used by vhost-vdpa which requires incremental updating 328 * of the mapping. So we can not use generic vhost memory listener which 329 * depends on the addnop(). 330 */ 331 static const MemoryListener vhost_vdpa_memory_listener = { 332 .name = "vhost-vdpa", 333 .commit = vhost_vdpa_listener_commit, 334 .region_add = vhost_vdpa_listener_region_add, 335 .region_del = vhost_vdpa_listener_region_del, 336 }; 337 338 static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, 339 void *arg) 340 { 341 struct vhost_vdpa *v = dev->opaque; 342 int fd = v->device_fd; 343 int ret; 344 345 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 346 347 ret = ioctl(fd, request, arg); 348 return ret < 0 ? -errno : ret; 349 } 350 351 static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) 352 { 353 uint8_t s; 354 int ret; 355 356 trace_vhost_vdpa_add_status(dev, status); 357 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s); 358 if (ret < 0) { 359 return ret; 360 } 361 362 s |= status; 363 364 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s); 365 if (ret < 0) { 366 return ret; 367 } 368 369 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s); 370 if (ret < 0) { 371 return ret; 372 } 373 374 if (!(s & status)) { 375 return -EIO; 376 } 377 378 return 0; 379 } 380 381 int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range) 382 { 383 int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range); 384 385 return ret < 0 ? -errno : 0; 386 } 387 388 /* 389 * The use of this function is for requests that only need to be 390 * applied once. Typically such request occurs at the beginning 391 * of operation, and before setting up queues. It should not be 392 * used for request that performs operation until all queues are 393 * set, which would need to check dev->vq_index_end instead. 394 */ 395 static bool vhost_vdpa_first_dev(struct vhost_dev *dev) 396 { 397 struct vhost_vdpa *v = dev->opaque; 398 399 return v->index == 0; 400 } 401 402 static int vhost_vdpa_get_dev_features(struct vhost_dev *dev, 403 uint64_t *features) 404 { 405 int ret; 406 407 ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features); 408 trace_vhost_vdpa_get_features(dev, *features); 409 return ret; 410 } 411 412 static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v) 413 { 414 g_autoptr(GPtrArray) shadow_vqs = NULL; 415 416 shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); 417 for (unsigned n = 0; n < hdev->nvqs; ++n) { 418 VhostShadowVirtqueue *svq; 419 420 svq = vhost_svq_new(v->shadow_vq_ops, v->shadow_vq_ops_opaque); 421 g_ptr_array_add(shadow_vqs, svq); 422 } 423 424 v->shadow_vqs = g_steal_pointer(&shadow_vqs); 425 } 426 427 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) 428 { 429 struct vhost_vdpa *v; 430 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 431 trace_vhost_vdpa_init(dev, opaque); 432 int ret; 433 434 /* 435 * Similar to VFIO, we end up pinning all guest memory and have to 436 * disable discarding of RAM. 437 */ 438 ret = ram_block_discard_disable(true); 439 if (ret) { 440 error_report("Cannot set discarding of RAM broken"); 441 return ret; 442 } 443 444 v = opaque; 445 v->dev = dev; 446 dev->opaque = opaque ; 447 v->listener = vhost_vdpa_memory_listener; 448 v->msg_type = VHOST_IOTLB_MSG_V2; 449 vhost_vdpa_init_svq(dev, v); 450 451 if (!vhost_vdpa_first_dev(dev)) { 452 return 0; 453 } 454 455 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 456 VIRTIO_CONFIG_S_DRIVER); 457 458 return 0; 459 } 460 461 static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev, 462 int queue_index) 463 { 464 size_t page_size = qemu_real_host_page_size(); 465 struct vhost_vdpa *v = dev->opaque; 466 VirtIODevice *vdev = dev->vdev; 467 VhostVDPAHostNotifier *n; 468 469 n = &v->notifier[queue_index]; 470 471 if (n->addr) { 472 virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, false); 473 object_unparent(OBJECT(&n->mr)); 474 munmap(n->addr, page_size); 475 n->addr = NULL; 476 } 477 } 478 479 static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index) 480 { 481 size_t page_size = qemu_real_host_page_size(); 482 struct vhost_vdpa *v = dev->opaque; 483 VirtIODevice *vdev = dev->vdev; 484 VhostVDPAHostNotifier *n; 485 int fd = v->device_fd; 486 void *addr; 487 char *name; 488 489 vhost_vdpa_host_notifier_uninit(dev, queue_index); 490 491 n = &v->notifier[queue_index]; 492 493 addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd, 494 queue_index * page_size); 495 if (addr == MAP_FAILED) { 496 goto err; 497 } 498 499 name = g_strdup_printf("vhost-vdpa/host-notifier@%p mmaps[%d]", 500 v, queue_index); 501 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 502 page_size, addr); 503 g_free(name); 504 505 if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) { 506 object_unparent(OBJECT(&n->mr)); 507 munmap(addr, page_size); 508 goto err; 509 } 510 n->addr = addr; 511 512 return 0; 513 514 err: 515 return -1; 516 } 517 518 static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n) 519 { 520 int i; 521 522 for (i = dev->vq_index; i < dev->vq_index + n; i++) { 523 vhost_vdpa_host_notifier_uninit(dev, i); 524 } 525 } 526 527 static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev) 528 { 529 struct vhost_vdpa *v = dev->opaque; 530 int i; 531 532 if (v->shadow_vqs_enabled) { 533 /* FIXME SVQ is not compatible with host notifiers mr */ 534 return; 535 } 536 537 for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) { 538 if (vhost_vdpa_host_notifier_init(dev, i)) { 539 goto err; 540 } 541 } 542 543 return; 544 545 err: 546 vhost_vdpa_host_notifiers_uninit(dev, i - dev->vq_index); 547 return; 548 } 549 550 static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev) 551 { 552 struct vhost_vdpa *v = dev->opaque; 553 size_t idx; 554 555 for (idx = 0; idx < v->shadow_vqs->len; ++idx) { 556 vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx)); 557 } 558 g_ptr_array_free(v->shadow_vqs, true); 559 } 560 561 static int vhost_vdpa_cleanup(struct vhost_dev *dev) 562 { 563 struct vhost_vdpa *v; 564 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 565 v = dev->opaque; 566 trace_vhost_vdpa_cleanup(dev, v); 567 vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); 568 memory_listener_unregister(&v->listener); 569 vhost_vdpa_svq_cleanup(dev); 570 571 dev->opaque = NULL; 572 ram_block_discard_disable(false); 573 574 return 0; 575 } 576 577 static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) 578 { 579 trace_vhost_vdpa_memslots_limit(dev, INT_MAX); 580 return INT_MAX; 581 } 582 583 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, 584 struct vhost_memory *mem) 585 { 586 if (!vhost_vdpa_first_dev(dev)) { 587 return 0; 588 } 589 590 trace_vhost_vdpa_set_mem_table(dev, mem->nregions, mem->padding); 591 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE) && 592 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS)) { 593 int i; 594 for (i = 0; i < mem->nregions; i++) { 595 trace_vhost_vdpa_dump_regions(dev, i, 596 mem->regions[i].guest_phys_addr, 597 mem->regions[i].memory_size, 598 mem->regions[i].userspace_addr, 599 mem->regions[i].flags_padding); 600 } 601 } 602 if (mem->padding) { 603 return -EINVAL; 604 } 605 606 return 0; 607 } 608 609 static int vhost_vdpa_set_features(struct vhost_dev *dev, 610 uint64_t features) 611 { 612 struct vhost_vdpa *v = dev->opaque; 613 int ret; 614 615 if (!vhost_vdpa_first_dev(dev)) { 616 return 0; 617 } 618 619 if (v->shadow_vqs_enabled) { 620 if ((v->acked_features ^ features) == BIT_ULL(VHOST_F_LOG_ALL)) { 621 /* 622 * QEMU is just trying to enable or disable logging. SVQ handles 623 * this sepparately, so no need to forward this. 624 */ 625 v->acked_features = features; 626 return 0; 627 } 628 629 v->acked_features = features; 630 631 /* We must not ack _F_LOG if SVQ is enabled */ 632 features &= ~BIT_ULL(VHOST_F_LOG_ALL); 633 } 634 635 trace_vhost_vdpa_set_features(dev, features); 636 ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features); 637 if (ret) { 638 return ret; 639 } 640 641 return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 642 } 643 644 static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) 645 { 646 uint64_t features; 647 uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | 648 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | 649 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID; 650 int r; 651 652 if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { 653 return -EFAULT; 654 } 655 656 features &= f; 657 658 if (vhost_vdpa_first_dev(dev)) { 659 r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); 660 if (r) { 661 return -EFAULT; 662 } 663 } 664 665 dev->backend_cap = features; 666 667 return 0; 668 } 669 670 static int vhost_vdpa_get_device_id(struct vhost_dev *dev, 671 uint32_t *device_id) 672 { 673 int ret; 674 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id); 675 trace_vhost_vdpa_get_device_id(dev, *device_id); 676 return ret; 677 } 678 679 static void vhost_vdpa_reset_svq(struct vhost_vdpa *v) 680 { 681 if (!v->shadow_vqs_enabled) { 682 return; 683 } 684 685 for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { 686 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); 687 vhost_svq_stop(svq); 688 } 689 } 690 691 static int vhost_vdpa_reset_device(struct vhost_dev *dev) 692 { 693 struct vhost_vdpa *v = dev->opaque; 694 int ret; 695 uint8_t status = 0; 696 697 vhost_vdpa_reset_svq(v); 698 699 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); 700 trace_vhost_vdpa_reset_device(dev, status); 701 return ret; 702 } 703 704 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) 705 { 706 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 707 708 trace_vhost_vdpa_get_vq_index(dev, idx, idx); 709 return idx; 710 } 711 712 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) 713 { 714 int i; 715 trace_vhost_vdpa_set_vring_ready(dev); 716 for (i = 0; i < dev->nvqs; ++i) { 717 struct vhost_vring_state state = { 718 .index = dev->vq_index + i, 719 .num = 1, 720 }; 721 vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); 722 } 723 return 0; 724 } 725 726 static int vhost_vdpa_set_config_call(struct vhost_dev *dev, 727 int fd) 728 { 729 trace_vhost_vdpa_set_config_call(dev, fd); 730 return vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG_CALL, &fd); 731 } 732 733 static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config, 734 uint32_t config_len) 735 { 736 int b, len; 737 char line[QEMU_HEXDUMP_LINE_LEN]; 738 739 for (b = 0; b < config_len; b += 16) { 740 len = config_len - b; 741 qemu_hexdump_line(line, b, config, len, false); 742 trace_vhost_vdpa_dump_config(dev, line); 743 } 744 } 745 746 static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data, 747 uint32_t offset, uint32_t size, 748 uint32_t flags) 749 { 750 struct vhost_vdpa_config *config; 751 int ret; 752 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 753 754 trace_vhost_vdpa_set_config(dev, offset, size, flags); 755 config = g_malloc(size + config_size); 756 config->off = offset; 757 config->len = size; 758 memcpy(config->buf, data, size); 759 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG) && 760 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) { 761 vhost_vdpa_dump_config(dev, data, size); 762 } 763 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config); 764 g_free(config); 765 return ret; 766 } 767 768 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config, 769 uint32_t config_len, Error **errp) 770 { 771 struct vhost_vdpa_config *v_config; 772 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 773 int ret; 774 775 trace_vhost_vdpa_get_config(dev, config, config_len); 776 v_config = g_malloc(config_len + config_size); 777 v_config->len = config_len; 778 v_config->off = 0; 779 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config); 780 memcpy(config, v_config->buf, config_len); 781 g_free(v_config); 782 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG) && 783 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) { 784 vhost_vdpa_dump_config(dev, config, config_len); 785 } 786 return ret; 787 } 788 789 static int vhost_vdpa_set_dev_vring_base(struct vhost_dev *dev, 790 struct vhost_vring_state *ring) 791 { 792 trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num); 793 return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring); 794 } 795 796 static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev, 797 struct vhost_vring_file *file) 798 { 799 trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd); 800 return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file); 801 } 802 803 static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev, 804 struct vhost_vring_file *file) 805 { 806 trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd); 807 return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file); 808 } 809 810 static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev, 811 struct vhost_vring_addr *addr) 812 { 813 trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags, 814 addr->desc_user_addr, addr->used_user_addr, 815 addr->avail_user_addr, 816 addr->log_guest_addr); 817 818 return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr); 819 820 } 821 822 /** 823 * Set the shadow virtqueue descriptors to the device 824 * 825 * @dev: The vhost device model 826 * @svq: The shadow virtqueue 827 * @idx: The index of the virtqueue in the vhost device 828 * @errp: Error 829 * 830 * Note that this function does not rewind kick file descriptor if cannot set 831 * call one. 832 */ 833 static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, 834 VhostShadowVirtqueue *svq, unsigned idx, 835 Error **errp) 836 { 837 struct vhost_vring_file file = { 838 .index = dev->vq_index + idx, 839 }; 840 const EventNotifier *event_notifier = &svq->hdev_kick; 841 int r; 842 843 r = event_notifier_init(&svq->hdev_kick, 0); 844 if (r != 0) { 845 error_setg_errno(errp, -r, "Couldn't create kick event notifier"); 846 goto err_init_hdev_kick; 847 } 848 849 r = event_notifier_init(&svq->hdev_call, 0); 850 if (r != 0) { 851 error_setg_errno(errp, -r, "Couldn't create call event notifier"); 852 goto err_init_hdev_call; 853 } 854 855 file.fd = event_notifier_get_fd(event_notifier); 856 r = vhost_vdpa_set_vring_dev_kick(dev, &file); 857 if (unlikely(r != 0)) { 858 error_setg_errno(errp, -r, "Can't set device kick fd"); 859 goto err_init_set_dev_fd; 860 } 861 862 event_notifier = &svq->hdev_call; 863 file.fd = event_notifier_get_fd(event_notifier); 864 r = vhost_vdpa_set_vring_dev_call(dev, &file); 865 if (unlikely(r != 0)) { 866 error_setg_errno(errp, -r, "Can't set device call fd"); 867 goto err_init_set_dev_fd; 868 } 869 870 return 0; 871 872 err_init_set_dev_fd: 873 event_notifier_set_handler(&svq->hdev_call, NULL); 874 875 err_init_hdev_call: 876 event_notifier_cleanup(&svq->hdev_kick); 877 878 err_init_hdev_kick: 879 return r; 880 } 881 882 /** 883 * Unmap a SVQ area in the device 884 */ 885 static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr) 886 { 887 const DMAMap needle = { 888 .translated_addr = addr, 889 }; 890 const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, &needle); 891 hwaddr size; 892 int r; 893 894 if (unlikely(!result)) { 895 error_report("Unable to find SVQ address to unmap"); 896 return; 897 } 898 899 size = ROUND_UP(result->size, qemu_real_host_page_size()); 900 r = vhost_vdpa_dma_unmap(v, v->address_space_id, result->iova, size); 901 if (unlikely(r < 0)) { 902 error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r); 903 return; 904 } 905 906 vhost_iova_tree_remove(v->iova_tree, *result); 907 } 908 909 static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, 910 const VhostShadowVirtqueue *svq) 911 { 912 struct vhost_vdpa *v = dev->opaque; 913 struct vhost_vring_addr svq_addr; 914 915 vhost_svq_get_vring_addr(svq, &svq_addr); 916 917 vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr); 918 919 vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr); 920 } 921 922 /** 923 * Map the SVQ area in the device 924 * 925 * @v: Vhost-vdpa device 926 * @needle: The area to search iova 927 * @errorp: Error pointer 928 */ 929 static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, 930 Error **errp) 931 { 932 int r; 933 934 r = vhost_iova_tree_map_alloc(v->iova_tree, needle); 935 if (unlikely(r != IOVA_OK)) { 936 error_setg(errp, "Cannot allocate iova (%d)", r); 937 return false; 938 } 939 940 r = vhost_vdpa_dma_map(v, v->address_space_id, needle->iova, 941 needle->size + 1, 942 (void *)(uintptr_t)needle->translated_addr, 943 needle->perm == IOMMU_RO); 944 if (unlikely(r != 0)) { 945 error_setg_errno(errp, -r, "Cannot map region to device"); 946 vhost_iova_tree_remove(v->iova_tree, *needle); 947 } 948 949 return r == 0; 950 } 951 952 /** 953 * Map the shadow virtqueue rings in the device 954 * 955 * @dev: The vhost device 956 * @svq: The shadow virtqueue 957 * @addr: Assigned IOVA addresses 958 * @errp: Error pointer 959 */ 960 static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev, 961 const VhostShadowVirtqueue *svq, 962 struct vhost_vring_addr *addr, 963 Error **errp) 964 { 965 ERRP_GUARD(); 966 DMAMap device_region, driver_region; 967 struct vhost_vring_addr svq_addr; 968 struct vhost_vdpa *v = dev->opaque; 969 size_t device_size = vhost_svq_device_area_size(svq); 970 size_t driver_size = vhost_svq_driver_area_size(svq); 971 size_t avail_offset; 972 bool ok; 973 974 vhost_svq_get_vring_addr(svq, &svq_addr); 975 976 driver_region = (DMAMap) { 977 .translated_addr = svq_addr.desc_user_addr, 978 .size = driver_size - 1, 979 .perm = IOMMU_RO, 980 }; 981 ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp); 982 if (unlikely(!ok)) { 983 error_prepend(errp, "Cannot create vq driver region: "); 984 return false; 985 } 986 addr->desc_user_addr = driver_region.iova; 987 avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr; 988 addr->avail_user_addr = driver_region.iova + avail_offset; 989 990 device_region = (DMAMap) { 991 .translated_addr = svq_addr.used_user_addr, 992 .size = device_size - 1, 993 .perm = IOMMU_RW, 994 }; 995 ok = vhost_vdpa_svq_map_ring(v, &device_region, errp); 996 if (unlikely(!ok)) { 997 error_prepend(errp, "Cannot create vq device region: "); 998 vhost_vdpa_svq_unmap_ring(v, driver_region.translated_addr); 999 } 1000 addr->used_user_addr = device_region.iova; 1001 1002 return ok; 1003 } 1004 1005 static bool vhost_vdpa_svq_setup(struct vhost_dev *dev, 1006 VhostShadowVirtqueue *svq, unsigned idx, 1007 Error **errp) 1008 { 1009 uint16_t vq_index = dev->vq_index + idx; 1010 struct vhost_vring_state s = { 1011 .index = vq_index, 1012 }; 1013 int r; 1014 1015 r = vhost_vdpa_set_dev_vring_base(dev, &s); 1016 if (unlikely(r)) { 1017 error_setg_errno(errp, -r, "Cannot set vring base"); 1018 return false; 1019 } 1020 1021 r = vhost_vdpa_svq_set_fds(dev, svq, idx, errp); 1022 return r == 0; 1023 } 1024 1025 static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) 1026 { 1027 struct vhost_vdpa *v = dev->opaque; 1028 Error *err = NULL; 1029 unsigned i; 1030 1031 if (!v->shadow_vqs_enabled) { 1032 return true; 1033 } 1034 1035 for (i = 0; i < v->shadow_vqs->len; ++i) { 1036 VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); 1037 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); 1038 struct vhost_vring_addr addr = { 1039 .index = dev->vq_index + i, 1040 }; 1041 int r; 1042 bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err); 1043 if (unlikely(!ok)) { 1044 goto err; 1045 } 1046 1047 vhost_svq_start(svq, dev->vdev, vq, v->iova_tree); 1048 ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err); 1049 if (unlikely(!ok)) { 1050 goto err_map; 1051 } 1052 1053 /* Override vring GPA set by vhost subsystem */ 1054 r = vhost_vdpa_set_vring_dev_addr(dev, &addr); 1055 if (unlikely(r != 0)) { 1056 error_setg_errno(&err, -r, "Cannot set device address"); 1057 goto err_set_addr; 1058 } 1059 } 1060 1061 return true; 1062 1063 err_set_addr: 1064 vhost_vdpa_svq_unmap_rings(dev, g_ptr_array_index(v->shadow_vqs, i)); 1065 1066 err_map: 1067 vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, i)); 1068 1069 err: 1070 error_reportf_err(err, "Cannot setup SVQ %u: ", i); 1071 for (unsigned j = 0; j < i; ++j) { 1072 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, j); 1073 vhost_vdpa_svq_unmap_rings(dev, svq); 1074 vhost_svq_stop(svq); 1075 } 1076 1077 return false; 1078 } 1079 1080 static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) 1081 { 1082 struct vhost_vdpa *v = dev->opaque; 1083 1084 if (!v->shadow_vqs_enabled) { 1085 return; 1086 } 1087 1088 for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { 1089 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); 1090 vhost_vdpa_svq_unmap_rings(dev, svq); 1091 1092 event_notifier_cleanup(&svq->hdev_kick); 1093 event_notifier_cleanup(&svq->hdev_call); 1094 } 1095 } 1096 1097 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) 1098 { 1099 struct vhost_vdpa *v = dev->opaque; 1100 bool ok; 1101 trace_vhost_vdpa_dev_start(dev, started); 1102 1103 if (started) { 1104 vhost_vdpa_host_notifiers_init(dev); 1105 ok = vhost_vdpa_svqs_start(dev); 1106 if (unlikely(!ok)) { 1107 return -1; 1108 } 1109 vhost_vdpa_set_vring_ready(dev); 1110 } else { 1111 vhost_vdpa_svqs_stop(dev); 1112 vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); 1113 } 1114 1115 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 1116 return 0; 1117 } 1118 1119 if (started) { 1120 memory_listener_register(&v->listener, &address_space_memory); 1121 return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); 1122 } else { 1123 vhost_vdpa_reset_device(dev); 1124 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 1125 VIRTIO_CONFIG_S_DRIVER); 1126 memory_listener_unregister(&v->listener); 1127 1128 return 0; 1129 } 1130 } 1131 1132 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, 1133 struct vhost_log *log) 1134 { 1135 struct vhost_vdpa *v = dev->opaque; 1136 if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) { 1137 return 0; 1138 } 1139 1140 trace_vhost_vdpa_set_log_base(dev, base, log->size, log->refcnt, log->fd, 1141 log->log); 1142 return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base); 1143 } 1144 1145 static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, 1146 struct vhost_vring_addr *addr) 1147 { 1148 struct vhost_vdpa *v = dev->opaque; 1149 1150 if (v->shadow_vqs_enabled) { 1151 /* 1152 * Device vring addr was set at device start. SVQ base is handled by 1153 * VirtQueue code. 1154 */ 1155 return 0; 1156 } 1157 1158 return vhost_vdpa_set_vring_dev_addr(dev, addr); 1159 } 1160 1161 static int vhost_vdpa_set_vring_num(struct vhost_dev *dev, 1162 struct vhost_vring_state *ring) 1163 { 1164 trace_vhost_vdpa_set_vring_num(dev, ring->index, ring->num); 1165 return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring); 1166 } 1167 1168 static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, 1169 struct vhost_vring_state *ring) 1170 { 1171 struct vhost_vdpa *v = dev->opaque; 1172 VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index); 1173 1174 /* 1175 * vhost-vdpa devices does not support in-flight requests. Set all of them 1176 * as available. 1177 * 1178 * TODO: This is ok for networking, but other kinds of devices might 1179 * have problems with these retransmissions. 1180 */ 1181 while (virtqueue_rewind(vq, 1)) { 1182 continue; 1183 } 1184 if (v->shadow_vqs_enabled) { 1185 /* 1186 * Device vring base was set at device start. SVQ base is handled by 1187 * VirtQueue code. 1188 */ 1189 return 0; 1190 } 1191 1192 return vhost_vdpa_set_dev_vring_base(dev, ring); 1193 } 1194 1195 static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, 1196 struct vhost_vring_state *ring) 1197 { 1198 struct vhost_vdpa *v = dev->opaque; 1199 int ret; 1200 1201 if (v->shadow_vqs_enabled) { 1202 ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index); 1203 return 0; 1204 } 1205 1206 ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); 1207 trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num); 1208 return ret; 1209 } 1210 1211 static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev, 1212 struct vhost_vring_file *file) 1213 { 1214 struct vhost_vdpa *v = dev->opaque; 1215 int vdpa_idx = file->index - dev->vq_index; 1216 1217 if (v->shadow_vqs_enabled) { 1218 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); 1219 vhost_svq_set_svq_kick_fd(svq, file->fd); 1220 return 0; 1221 } else { 1222 return vhost_vdpa_set_vring_dev_kick(dev, file); 1223 } 1224 } 1225 1226 static int vhost_vdpa_set_vring_call(struct vhost_dev *dev, 1227 struct vhost_vring_file *file) 1228 { 1229 struct vhost_vdpa *v = dev->opaque; 1230 1231 if (v->shadow_vqs_enabled) { 1232 int vdpa_idx = file->index - dev->vq_index; 1233 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); 1234 1235 vhost_svq_set_svq_call_fd(svq, file->fd); 1236 return 0; 1237 } else { 1238 return vhost_vdpa_set_vring_dev_call(dev, file); 1239 } 1240 } 1241 1242 static int vhost_vdpa_get_features(struct vhost_dev *dev, 1243 uint64_t *features) 1244 { 1245 struct vhost_vdpa *v = dev->opaque; 1246 int ret = vhost_vdpa_get_dev_features(dev, features); 1247 1248 if (ret == 0 && v->shadow_vqs_enabled) { 1249 /* Add SVQ logging capabilities */ 1250 *features |= BIT_ULL(VHOST_F_LOG_ALL); 1251 } 1252 1253 return ret; 1254 } 1255 1256 static int vhost_vdpa_set_owner(struct vhost_dev *dev) 1257 { 1258 if (!vhost_vdpa_first_dev(dev)) { 1259 return 0; 1260 } 1261 1262 trace_vhost_vdpa_set_owner(dev); 1263 return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); 1264 } 1265 1266 static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev, 1267 struct vhost_vring_addr *addr, struct vhost_virtqueue *vq) 1268 { 1269 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 1270 addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys; 1271 addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys; 1272 addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys; 1273 trace_vhost_vdpa_vq_get_addr(dev, vq, addr->desc_user_addr, 1274 addr->avail_user_addr, addr->used_user_addr); 1275 return 0; 1276 } 1277 1278 static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) 1279 { 1280 return true; 1281 } 1282 1283 const VhostOps vdpa_ops = { 1284 .backend_type = VHOST_BACKEND_TYPE_VDPA, 1285 .vhost_backend_init = vhost_vdpa_init, 1286 .vhost_backend_cleanup = vhost_vdpa_cleanup, 1287 .vhost_set_log_base = vhost_vdpa_set_log_base, 1288 .vhost_set_vring_addr = vhost_vdpa_set_vring_addr, 1289 .vhost_set_vring_num = vhost_vdpa_set_vring_num, 1290 .vhost_set_vring_base = vhost_vdpa_set_vring_base, 1291 .vhost_get_vring_base = vhost_vdpa_get_vring_base, 1292 .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, 1293 .vhost_set_vring_call = vhost_vdpa_set_vring_call, 1294 .vhost_get_features = vhost_vdpa_get_features, 1295 .vhost_set_backend_cap = vhost_vdpa_set_backend_cap, 1296 .vhost_set_owner = vhost_vdpa_set_owner, 1297 .vhost_set_vring_endian = NULL, 1298 .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, 1299 .vhost_set_mem_table = vhost_vdpa_set_mem_table, 1300 .vhost_set_features = vhost_vdpa_set_features, 1301 .vhost_reset_device = vhost_vdpa_reset_device, 1302 .vhost_get_vq_index = vhost_vdpa_get_vq_index, 1303 .vhost_get_config = vhost_vdpa_get_config, 1304 .vhost_set_config = vhost_vdpa_set_config, 1305 .vhost_requires_shm_log = NULL, 1306 .vhost_migration_done = NULL, 1307 .vhost_backend_can_merge = NULL, 1308 .vhost_net_set_mtu = NULL, 1309 .vhost_set_iotlb_callback = NULL, 1310 .vhost_send_device_iotlb_msg = NULL, 1311 .vhost_dev_start = vhost_vdpa_dev_start, 1312 .vhost_get_device_id = vhost_vdpa_get_device_id, 1313 .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, 1314 .vhost_force_iommu = vhost_vdpa_force_iommu, 1315 .vhost_set_config_call = vhost_vdpa_set_config_call, 1316 }; 1317