1 /* 2 * vhost-vdpa 3 * 4 * Copyright(c) 2017-2018 Intel Corporation. 5 * Copyright(c) 2020 Red Hat, Inc. 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or later. 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 #include "qemu/osdep.h" 13 #include <linux/vhost.h> 14 #include <linux/vfio.h> 15 #include <sys/eventfd.h> 16 #include <sys/ioctl.h> 17 #include "hw/virtio/vhost.h" 18 #include "hw/virtio/vhost-backend.h" 19 #include "hw/virtio/virtio-net.h" 20 #include "hw/virtio/vhost-shadow-virtqueue.h" 21 #include "hw/virtio/vhost-vdpa.h" 22 #include "exec/address-spaces.h" 23 #include "qemu/main-loop.h" 24 #include "cpu.h" 25 #include "trace.h" 26 #include "qemu-common.h" 27 #include "qapi/error.h" 28 29 /* 30 * Return one past the end of the end of section. Be careful with uint64_t 31 * conversions! 32 */ 33 static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section) 34 { 35 Int128 llend = int128_make64(section->offset_within_address_space); 36 llend = int128_add(llend, section->size); 37 llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); 38 39 return llend; 40 } 41 42 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, 43 uint64_t iova_min, 44 uint64_t iova_max) 45 { 46 Int128 llend; 47 48 if ((!memory_region_is_ram(section->mr) && 49 !memory_region_is_iommu(section->mr)) || 50 memory_region_is_protected(section->mr) || 51 /* vhost-vDPA doesn't allow MMIO to be mapped */ 52 memory_region_is_ram_device(section->mr)) { 53 return true; 54 } 55 56 if (section->offset_within_address_space < iova_min) { 57 error_report("RAM section out of device range (min=0x%" PRIx64 58 ", addr=0x%" HWADDR_PRIx ")", 59 iova_min, section->offset_within_address_space); 60 return true; 61 } 62 63 llend = vhost_vdpa_section_end(section); 64 if (int128_gt(llend, int128_make64(iova_max))) { 65 error_report("RAM section out of device range (max=0x%" PRIx64 66 ", end addr=0x%" PRIx64 ")", 67 iova_max, int128_get64(llend)); 68 return true; 69 } 70 71 return false; 72 } 73 74 static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, 75 void *vaddr, bool readonly) 76 { 77 struct vhost_msg_v2 msg = {}; 78 int fd = v->device_fd; 79 int ret = 0; 80 81 msg.type = v->msg_type; 82 msg.iotlb.iova = iova; 83 msg.iotlb.size = size; 84 msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr; 85 msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; 86 msg.iotlb.type = VHOST_IOTLB_UPDATE; 87 88 trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.iotlb.iova, msg.iotlb.size, 89 msg.iotlb.uaddr, msg.iotlb.perm, msg.iotlb.type); 90 91 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 92 error_report("failed to write, fd=%d, errno=%d (%s)", 93 fd, errno, strerror(errno)); 94 return -EIO ; 95 } 96 97 return ret; 98 } 99 100 static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, 101 hwaddr size) 102 { 103 struct vhost_msg_v2 msg = {}; 104 int fd = v->device_fd; 105 int ret = 0; 106 107 msg.type = v->msg_type; 108 msg.iotlb.iova = iova; 109 msg.iotlb.size = size; 110 msg.iotlb.type = VHOST_IOTLB_INVALIDATE; 111 112 trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.iotlb.iova, 113 msg.iotlb.size, msg.iotlb.type); 114 115 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 116 error_report("failed to write, fd=%d, errno=%d (%s)", 117 fd, errno, strerror(errno)); 118 return -EIO ; 119 } 120 121 return ret; 122 } 123 124 static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v) 125 { 126 int fd = v->device_fd; 127 struct vhost_msg_v2 msg = { 128 .type = v->msg_type, 129 .iotlb.type = VHOST_IOTLB_BATCH_BEGIN, 130 }; 131 132 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 133 error_report("failed to write, fd=%d, errno=%d (%s)", 134 fd, errno, strerror(errno)); 135 } 136 } 137 138 static void vhost_vdpa_iotlb_batch_begin_once(struct vhost_vdpa *v) 139 { 140 if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) && 141 !v->iotlb_batch_begin_sent) { 142 vhost_vdpa_listener_begin_batch(v); 143 } 144 145 v->iotlb_batch_begin_sent = true; 146 } 147 148 static void vhost_vdpa_listener_commit(MemoryListener *listener) 149 { 150 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 151 struct vhost_dev *dev = v->dev; 152 struct vhost_msg_v2 msg = {}; 153 int fd = v->device_fd; 154 155 if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { 156 return; 157 } 158 159 if (!v->iotlb_batch_begin_sent) { 160 return; 161 } 162 163 msg.type = v->msg_type; 164 msg.iotlb.type = VHOST_IOTLB_BATCH_END; 165 166 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 167 error_report("failed to write, fd=%d, errno=%d (%s)", 168 fd, errno, strerror(errno)); 169 } 170 171 v->iotlb_batch_begin_sent = false; 172 } 173 174 static void vhost_vdpa_listener_region_add(MemoryListener *listener, 175 MemoryRegionSection *section) 176 { 177 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 178 hwaddr iova; 179 Int128 llend, llsize; 180 void *vaddr; 181 int ret; 182 183 if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, 184 v->iova_range.last)) { 185 return; 186 } 187 188 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 189 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 190 error_report("%s received unaligned region", __func__); 191 return; 192 } 193 194 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 195 llend = vhost_vdpa_section_end(section); 196 if (int128_ge(int128_make64(iova), llend)) { 197 return; 198 } 199 200 memory_region_ref(section->mr); 201 202 /* Here we assume that memory_region_is_ram(section->mr)==true */ 203 204 vaddr = memory_region_get_ram_ptr(section->mr) + 205 section->offset_within_region + 206 (iova - section->offset_within_address_space); 207 208 trace_vhost_vdpa_listener_region_add(v, iova, int128_get64(llend), 209 vaddr, section->readonly); 210 211 llsize = int128_sub(llend, int128_make64(iova)); 212 213 vhost_vdpa_iotlb_batch_begin_once(v); 214 ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), 215 vaddr, section->readonly); 216 if (ret) { 217 error_report("vhost vdpa map fail!"); 218 goto fail; 219 } 220 221 return; 222 223 fail: 224 /* 225 * On the initfn path, store the first error in the container so we 226 * can gracefully fail. Runtime, there's not much we can do other 227 * than throw a hardware error. 228 */ 229 error_report("vhost-vdpa: DMA mapping failed, unable to continue"); 230 return; 231 232 } 233 234 static void vhost_vdpa_listener_region_del(MemoryListener *listener, 235 MemoryRegionSection *section) 236 { 237 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 238 hwaddr iova; 239 Int128 llend, llsize; 240 int ret; 241 242 if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, 243 v->iova_range.last)) { 244 return; 245 } 246 247 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 248 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 249 error_report("%s received unaligned region", __func__); 250 return; 251 } 252 253 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 254 llend = vhost_vdpa_section_end(section); 255 256 trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(llend)); 257 258 if (int128_ge(int128_make64(iova), llend)) { 259 return; 260 } 261 262 llsize = int128_sub(llend, int128_make64(iova)); 263 264 vhost_vdpa_iotlb_batch_begin_once(v); 265 ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); 266 if (ret) { 267 error_report("vhost_vdpa dma unmap error!"); 268 } 269 270 memory_region_unref(section->mr); 271 } 272 /* 273 * IOTLB API is used by vhost-vpda which requires incremental updating 274 * of the mapping. So we can not use generic vhost memory listener which 275 * depends on the addnop(). 276 */ 277 static const MemoryListener vhost_vdpa_memory_listener = { 278 .name = "vhost-vdpa", 279 .commit = vhost_vdpa_listener_commit, 280 .region_add = vhost_vdpa_listener_region_add, 281 .region_del = vhost_vdpa_listener_region_del, 282 }; 283 284 static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, 285 void *arg) 286 { 287 struct vhost_vdpa *v = dev->opaque; 288 int fd = v->device_fd; 289 int ret; 290 291 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 292 293 ret = ioctl(fd, request, arg); 294 return ret < 0 ? -errno : ret; 295 } 296 297 static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) 298 { 299 uint8_t s; 300 int ret; 301 302 trace_vhost_vdpa_add_status(dev, status); 303 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s); 304 if (ret < 0) { 305 return ret; 306 } 307 308 s |= status; 309 310 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s); 311 if (ret < 0) { 312 return ret; 313 } 314 315 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s); 316 if (ret < 0) { 317 return ret; 318 } 319 320 if (!(s & status)) { 321 return -EIO; 322 } 323 324 return 0; 325 } 326 327 static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v) 328 { 329 int ret = vhost_vdpa_call(v->dev, VHOST_VDPA_GET_IOVA_RANGE, 330 &v->iova_range); 331 if (ret != 0) { 332 v->iova_range.first = 0; 333 v->iova_range.last = UINT64_MAX; 334 } 335 336 trace_vhost_vdpa_get_iova_range(v->dev, v->iova_range.first, 337 v->iova_range.last); 338 } 339 340 static bool vhost_vdpa_one_time_request(struct vhost_dev *dev) 341 { 342 struct vhost_vdpa *v = dev->opaque; 343 344 return v->index != 0; 345 } 346 347 static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, 348 Error **errp) 349 { 350 g_autoptr(GPtrArray) shadow_vqs = NULL; 351 uint64_t dev_features, svq_features; 352 int r; 353 bool ok; 354 355 if (!v->shadow_vqs_enabled) { 356 return 0; 357 } 358 359 r = hdev->vhost_ops->vhost_get_features(hdev, &dev_features); 360 if (r != 0) { 361 error_setg_errno(errp, -r, "Can't get vdpa device features"); 362 return r; 363 } 364 365 svq_features = dev_features; 366 ok = vhost_svq_valid_features(svq_features, errp); 367 if (unlikely(!ok)) { 368 return -1; 369 } 370 371 shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); 372 for (unsigned n = 0; n < hdev->nvqs; ++n) { 373 g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(); 374 375 if (unlikely(!svq)) { 376 error_setg(errp, "Cannot create svq %u", n); 377 return -1; 378 } 379 g_ptr_array_add(shadow_vqs, g_steal_pointer(&svq)); 380 } 381 382 v->shadow_vqs = g_steal_pointer(&shadow_vqs); 383 return 0; 384 } 385 386 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) 387 { 388 struct vhost_vdpa *v; 389 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 390 trace_vhost_vdpa_init(dev, opaque); 391 int ret; 392 393 /* 394 * Similar to VFIO, we end up pinning all guest memory and have to 395 * disable discarding of RAM. 396 */ 397 ret = ram_block_discard_disable(true); 398 if (ret) { 399 error_report("Cannot set discarding of RAM broken"); 400 return ret; 401 } 402 403 v = opaque; 404 v->dev = dev; 405 dev->opaque = opaque ; 406 v->listener = vhost_vdpa_memory_listener; 407 v->msg_type = VHOST_IOTLB_MSG_V2; 408 ret = vhost_vdpa_init_svq(dev, v, errp); 409 if (ret) { 410 goto err; 411 } 412 413 vhost_vdpa_get_iova_range(v); 414 415 if (vhost_vdpa_one_time_request(dev)) { 416 return 0; 417 } 418 419 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 420 VIRTIO_CONFIG_S_DRIVER); 421 422 return 0; 423 424 err: 425 ram_block_discard_disable(false); 426 return ret; 427 } 428 429 static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev, 430 int queue_index) 431 { 432 size_t page_size = qemu_real_host_page_size; 433 struct vhost_vdpa *v = dev->opaque; 434 VirtIODevice *vdev = dev->vdev; 435 VhostVDPAHostNotifier *n; 436 437 n = &v->notifier[queue_index]; 438 439 if (n->addr) { 440 virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, false); 441 object_unparent(OBJECT(&n->mr)); 442 munmap(n->addr, page_size); 443 n->addr = NULL; 444 } 445 } 446 447 static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index) 448 { 449 size_t page_size = qemu_real_host_page_size; 450 struct vhost_vdpa *v = dev->opaque; 451 VirtIODevice *vdev = dev->vdev; 452 VhostVDPAHostNotifier *n; 453 int fd = v->device_fd; 454 void *addr; 455 char *name; 456 457 vhost_vdpa_host_notifier_uninit(dev, queue_index); 458 459 n = &v->notifier[queue_index]; 460 461 addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd, 462 queue_index * page_size); 463 if (addr == MAP_FAILED) { 464 goto err; 465 } 466 467 name = g_strdup_printf("vhost-vdpa/host-notifier@%p mmaps[%d]", 468 v, queue_index); 469 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 470 page_size, addr); 471 g_free(name); 472 473 if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) { 474 object_unparent(OBJECT(&n->mr)); 475 munmap(addr, page_size); 476 goto err; 477 } 478 n->addr = addr; 479 480 return 0; 481 482 err: 483 return -1; 484 } 485 486 static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n) 487 { 488 int i; 489 490 for (i = dev->vq_index; i < dev->vq_index + n; i++) { 491 vhost_vdpa_host_notifier_uninit(dev, i); 492 } 493 } 494 495 static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev) 496 { 497 struct vhost_vdpa *v = dev->opaque; 498 int i; 499 500 if (v->shadow_vqs_enabled) { 501 /* FIXME SVQ is not compatible with host notifiers mr */ 502 return; 503 } 504 505 for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) { 506 if (vhost_vdpa_host_notifier_init(dev, i)) { 507 goto err; 508 } 509 } 510 511 return; 512 513 err: 514 vhost_vdpa_host_notifiers_uninit(dev, i - dev->vq_index); 515 return; 516 } 517 518 static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev) 519 { 520 struct vhost_vdpa *v = dev->opaque; 521 size_t idx; 522 523 if (!v->shadow_vqs) { 524 return; 525 } 526 527 for (idx = 0; idx < v->shadow_vqs->len; ++idx) { 528 vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx)); 529 } 530 g_ptr_array_free(v->shadow_vqs, true); 531 } 532 533 static int vhost_vdpa_cleanup(struct vhost_dev *dev) 534 { 535 struct vhost_vdpa *v; 536 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 537 v = dev->opaque; 538 trace_vhost_vdpa_cleanup(dev, v); 539 vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); 540 memory_listener_unregister(&v->listener); 541 vhost_vdpa_svq_cleanup(dev); 542 543 dev->opaque = NULL; 544 ram_block_discard_disable(false); 545 546 return 0; 547 } 548 549 static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) 550 { 551 trace_vhost_vdpa_memslots_limit(dev, INT_MAX); 552 return INT_MAX; 553 } 554 555 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, 556 struct vhost_memory *mem) 557 { 558 if (vhost_vdpa_one_time_request(dev)) { 559 return 0; 560 } 561 562 trace_vhost_vdpa_set_mem_table(dev, mem->nregions, mem->padding); 563 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE) && 564 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS)) { 565 int i; 566 for (i = 0; i < mem->nregions; i++) { 567 trace_vhost_vdpa_dump_regions(dev, i, 568 mem->regions[i].guest_phys_addr, 569 mem->regions[i].memory_size, 570 mem->regions[i].userspace_addr, 571 mem->regions[i].flags_padding); 572 } 573 } 574 if (mem->padding) { 575 return -EINVAL; 576 } 577 578 return 0; 579 } 580 581 static int vhost_vdpa_set_features(struct vhost_dev *dev, 582 uint64_t features) 583 { 584 int ret; 585 586 if (vhost_vdpa_one_time_request(dev)) { 587 return 0; 588 } 589 590 trace_vhost_vdpa_set_features(dev, features); 591 ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features); 592 if (ret) { 593 return ret; 594 } 595 596 return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 597 } 598 599 static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) 600 { 601 uint64_t features; 602 uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | 603 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH; 604 int r; 605 606 if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { 607 return -EFAULT; 608 } 609 610 features &= f; 611 612 if (vhost_vdpa_one_time_request(dev)) { 613 r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); 614 if (r) { 615 return -EFAULT; 616 } 617 } 618 619 dev->backend_cap = features; 620 621 return 0; 622 } 623 624 static int vhost_vdpa_get_device_id(struct vhost_dev *dev, 625 uint32_t *device_id) 626 { 627 int ret; 628 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id); 629 trace_vhost_vdpa_get_device_id(dev, *device_id); 630 return ret; 631 } 632 633 static void vhost_vdpa_reset_svq(struct vhost_vdpa *v) 634 { 635 if (!v->shadow_vqs_enabled) { 636 return; 637 } 638 639 for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { 640 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); 641 vhost_svq_stop(svq); 642 } 643 } 644 645 static int vhost_vdpa_reset_device(struct vhost_dev *dev) 646 { 647 struct vhost_vdpa *v = dev->opaque; 648 int ret; 649 uint8_t status = 0; 650 651 vhost_vdpa_reset_svq(v); 652 653 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); 654 trace_vhost_vdpa_reset_device(dev, status); 655 return ret; 656 } 657 658 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) 659 { 660 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 661 662 trace_vhost_vdpa_get_vq_index(dev, idx, idx); 663 return idx; 664 } 665 666 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) 667 { 668 int i; 669 trace_vhost_vdpa_set_vring_ready(dev); 670 for (i = 0; i < dev->nvqs; ++i) { 671 struct vhost_vring_state state = { 672 .index = dev->vq_index + i, 673 .num = 1, 674 }; 675 vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); 676 } 677 return 0; 678 } 679 680 static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config, 681 uint32_t config_len) 682 { 683 int b, len; 684 char line[QEMU_HEXDUMP_LINE_LEN]; 685 686 for (b = 0; b < config_len; b += 16) { 687 len = config_len - b; 688 qemu_hexdump_line(line, b, config, len, false); 689 trace_vhost_vdpa_dump_config(dev, line); 690 } 691 } 692 693 static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data, 694 uint32_t offset, uint32_t size, 695 uint32_t flags) 696 { 697 struct vhost_vdpa_config *config; 698 int ret; 699 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 700 701 trace_vhost_vdpa_set_config(dev, offset, size, flags); 702 config = g_malloc(size + config_size); 703 config->off = offset; 704 config->len = size; 705 memcpy(config->buf, data, size); 706 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG) && 707 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) { 708 vhost_vdpa_dump_config(dev, data, size); 709 } 710 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config); 711 g_free(config); 712 return ret; 713 } 714 715 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config, 716 uint32_t config_len, Error **errp) 717 { 718 struct vhost_vdpa_config *v_config; 719 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 720 int ret; 721 722 trace_vhost_vdpa_get_config(dev, config, config_len); 723 v_config = g_malloc(config_len + config_size); 724 v_config->len = config_len; 725 v_config->off = 0; 726 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config); 727 memcpy(config, v_config->buf, config_len); 728 g_free(v_config); 729 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG) && 730 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) { 731 vhost_vdpa_dump_config(dev, config, config_len); 732 } 733 return ret; 734 } 735 736 static int vhost_vdpa_set_dev_vring_base(struct vhost_dev *dev, 737 struct vhost_vring_state *ring) 738 { 739 trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num); 740 return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring); 741 } 742 743 static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev, 744 struct vhost_vring_file *file) 745 { 746 trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd); 747 return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file); 748 } 749 750 static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev, 751 struct vhost_vring_file *file) 752 { 753 trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd); 754 return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file); 755 } 756 757 static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev, 758 struct vhost_vring_addr *addr) 759 { 760 trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags, 761 addr->desc_user_addr, addr->used_user_addr, 762 addr->avail_user_addr, 763 addr->log_guest_addr); 764 765 return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr); 766 767 } 768 769 /** 770 * Set the shadow virtqueue descriptors to the device 771 * 772 * @dev: The vhost device model 773 * @svq: The shadow virtqueue 774 * @idx: The index of the virtqueue in the vhost device 775 * @errp: Error 776 * 777 * Note that this function does not rewind kick file descriptor if cannot set 778 * call one. 779 */ 780 static bool vhost_vdpa_svq_setup(struct vhost_dev *dev, 781 VhostShadowVirtqueue *svq, unsigned idx, 782 Error **errp) 783 { 784 struct vhost_vring_file file = { 785 .index = dev->vq_index + idx, 786 }; 787 const EventNotifier *event_notifier = &svq->hdev_kick; 788 int r; 789 790 file.fd = event_notifier_get_fd(event_notifier); 791 r = vhost_vdpa_set_vring_dev_kick(dev, &file); 792 if (unlikely(r != 0)) { 793 error_setg_errno(errp, -r, "Can't set device kick fd"); 794 return false; 795 } 796 797 event_notifier = &svq->hdev_call; 798 file.fd = event_notifier_get_fd(event_notifier); 799 r = vhost_vdpa_set_vring_dev_call(dev, &file); 800 if (unlikely(r != 0)) { 801 error_setg_errno(errp, -r, "Can't set device call fd"); 802 } 803 804 return r == 0; 805 } 806 807 static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) 808 { 809 struct vhost_vdpa *v = dev->opaque; 810 Error *err = NULL; 811 unsigned i; 812 813 if (!v->shadow_vqs) { 814 return true; 815 } 816 817 for (i = 0; i < v->shadow_vqs->len; ++i) { 818 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); 819 bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err); 820 if (unlikely(!ok)) { 821 error_reportf_err(err, "Cannot setup SVQ %u: ", i); 822 return false; 823 } 824 } 825 826 return true; 827 } 828 829 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) 830 { 831 struct vhost_vdpa *v = dev->opaque; 832 bool ok; 833 trace_vhost_vdpa_dev_start(dev, started); 834 835 if (started) { 836 vhost_vdpa_host_notifiers_init(dev); 837 ok = vhost_vdpa_svqs_start(dev); 838 if (unlikely(!ok)) { 839 return -1; 840 } 841 vhost_vdpa_set_vring_ready(dev); 842 } else { 843 vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); 844 } 845 846 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 847 return 0; 848 } 849 850 if (started) { 851 memory_listener_register(&v->listener, &address_space_memory); 852 return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); 853 } else { 854 vhost_vdpa_reset_device(dev); 855 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 856 VIRTIO_CONFIG_S_DRIVER); 857 memory_listener_unregister(&v->listener); 858 859 return 0; 860 } 861 } 862 863 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, 864 struct vhost_log *log) 865 { 866 if (vhost_vdpa_one_time_request(dev)) { 867 return 0; 868 } 869 870 trace_vhost_vdpa_set_log_base(dev, base, log->size, log->refcnt, log->fd, 871 log->log); 872 return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base); 873 } 874 875 static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, 876 struct vhost_vring_addr *addr) 877 { 878 struct vhost_vdpa *v = dev->opaque; 879 880 if (v->shadow_vqs_enabled) { 881 /* 882 * Device vring addr was set at device start. SVQ base is handled by 883 * VirtQueue code. 884 */ 885 return 0; 886 } 887 888 return vhost_vdpa_set_vring_dev_addr(dev, addr); 889 } 890 891 static int vhost_vdpa_set_vring_num(struct vhost_dev *dev, 892 struct vhost_vring_state *ring) 893 { 894 trace_vhost_vdpa_set_vring_num(dev, ring->index, ring->num); 895 return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring); 896 } 897 898 static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, 899 struct vhost_vring_state *ring) 900 { 901 struct vhost_vdpa *v = dev->opaque; 902 903 if (v->shadow_vqs_enabled) { 904 /* 905 * Device vring base was set at device start. SVQ base is handled by 906 * VirtQueue code. 907 */ 908 return 0; 909 } 910 911 return vhost_vdpa_set_dev_vring_base(dev, ring); 912 } 913 914 static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, 915 struct vhost_vring_state *ring) 916 { 917 int ret; 918 919 ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); 920 trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num); 921 return ret; 922 } 923 924 static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev, 925 struct vhost_vring_file *file) 926 { 927 struct vhost_vdpa *v = dev->opaque; 928 int vdpa_idx = file->index - dev->vq_index; 929 930 if (v->shadow_vqs_enabled) { 931 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); 932 vhost_svq_set_svq_kick_fd(svq, file->fd); 933 return 0; 934 } else { 935 return vhost_vdpa_set_vring_dev_kick(dev, file); 936 } 937 } 938 939 static int vhost_vdpa_set_vring_call(struct vhost_dev *dev, 940 struct vhost_vring_file *file) 941 { 942 struct vhost_vdpa *v = dev->opaque; 943 944 if (v->shadow_vqs_enabled) { 945 int vdpa_idx = file->index - dev->vq_index; 946 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); 947 948 vhost_svq_set_svq_call_fd(svq, file->fd); 949 return 0; 950 } else { 951 return vhost_vdpa_set_vring_dev_call(dev, file); 952 } 953 } 954 955 static int vhost_vdpa_get_features(struct vhost_dev *dev, 956 uint64_t *features) 957 { 958 int ret; 959 960 ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features); 961 trace_vhost_vdpa_get_features(dev, *features); 962 return ret; 963 } 964 965 static int vhost_vdpa_set_owner(struct vhost_dev *dev) 966 { 967 if (vhost_vdpa_one_time_request(dev)) { 968 return 0; 969 } 970 971 trace_vhost_vdpa_set_owner(dev); 972 return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); 973 } 974 975 static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev, 976 struct vhost_vring_addr *addr, struct vhost_virtqueue *vq) 977 { 978 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 979 addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys; 980 addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys; 981 addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys; 982 trace_vhost_vdpa_vq_get_addr(dev, vq, addr->desc_user_addr, 983 addr->avail_user_addr, addr->used_user_addr); 984 return 0; 985 } 986 987 static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) 988 { 989 return true; 990 } 991 992 const VhostOps vdpa_ops = { 993 .backend_type = VHOST_BACKEND_TYPE_VDPA, 994 .vhost_backend_init = vhost_vdpa_init, 995 .vhost_backend_cleanup = vhost_vdpa_cleanup, 996 .vhost_set_log_base = vhost_vdpa_set_log_base, 997 .vhost_set_vring_addr = vhost_vdpa_set_vring_addr, 998 .vhost_set_vring_num = vhost_vdpa_set_vring_num, 999 .vhost_set_vring_base = vhost_vdpa_set_vring_base, 1000 .vhost_get_vring_base = vhost_vdpa_get_vring_base, 1001 .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, 1002 .vhost_set_vring_call = vhost_vdpa_set_vring_call, 1003 .vhost_get_features = vhost_vdpa_get_features, 1004 .vhost_set_backend_cap = vhost_vdpa_set_backend_cap, 1005 .vhost_set_owner = vhost_vdpa_set_owner, 1006 .vhost_set_vring_endian = NULL, 1007 .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, 1008 .vhost_set_mem_table = vhost_vdpa_set_mem_table, 1009 .vhost_set_features = vhost_vdpa_set_features, 1010 .vhost_reset_device = vhost_vdpa_reset_device, 1011 .vhost_get_vq_index = vhost_vdpa_get_vq_index, 1012 .vhost_get_config = vhost_vdpa_get_config, 1013 .vhost_set_config = vhost_vdpa_set_config, 1014 .vhost_requires_shm_log = NULL, 1015 .vhost_migration_done = NULL, 1016 .vhost_backend_can_merge = NULL, 1017 .vhost_net_set_mtu = NULL, 1018 .vhost_set_iotlb_callback = NULL, 1019 .vhost_send_device_iotlb_msg = NULL, 1020 .vhost_dev_start = vhost_vdpa_dev_start, 1021 .vhost_get_device_id = vhost_vdpa_get_device_id, 1022 .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, 1023 .vhost_force_iommu = vhost_vdpa_force_iommu, 1024 }; 1025