1108a6481SCindy Lu /* 2108a6481SCindy Lu * vhost-vdpa 3108a6481SCindy Lu * 4108a6481SCindy Lu * Copyright(c) 2017-2018 Intel Corporation. 5108a6481SCindy Lu * Copyright(c) 2020 Red Hat, Inc. 6108a6481SCindy Lu * 7108a6481SCindy Lu * This work is licensed under the terms of the GNU GPL, version 2 or later. 8108a6481SCindy Lu * See the COPYING file in the top-level directory. 9108a6481SCindy Lu * 10108a6481SCindy Lu */ 11108a6481SCindy Lu 12108a6481SCindy Lu #include "qemu/osdep.h" 13108a6481SCindy Lu #include <linux/vhost.h> 14108a6481SCindy Lu #include <linux/vfio.h> 15108a6481SCindy Lu #include <sys/eventfd.h> 16108a6481SCindy Lu #include <sys/ioctl.h> 17108a6481SCindy Lu #include "hw/virtio/vhost.h" 18108a6481SCindy Lu #include "hw/virtio/vhost-backend.h" 19108a6481SCindy Lu #include "hw/virtio/virtio-net.h" 20108a6481SCindy Lu #include "hw/virtio/vhost-vdpa.h" 21df77d45aSXie Yongji #include "exec/address-spaces.h" 22108a6481SCindy Lu #include "qemu/main-loop.h" 234dc5acc0SCindy Lu #include "cpu.h" 24778e67deSLaurent Vivier #include "trace.h" 25778e67deSLaurent Vivier #include "qemu-common.h" 26108a6481SCindy Lu 27032e4d68SEugenio Pérez /* 28032e4d68SEugenio Pérez * Return one past the end of the end of section. Be careful with uint64_t 29032e4d68SEugenio Pérez * conversions! 30032e4d68SEugenio Pérez */ 31032e4d68SEugenio Pérez static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section) 32032e4d68SEugenio Pérez { 33032e4d68SEugenio Pérez Int128 llend = int128_make64(section->offset_within_address_space); 34032e4d68SEugenio Pérez llend = int128_add(llend, section->size); 35032e4d68SEugenio Pérez llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); 36032e4d68SEugenio Pérez 37032e4d68SEugenio Pérez return llend; 38032e4d68SEugenio Pérez } 39032e4d68SEugenio Pérez 40*013108b6SEugenio Pérez static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, 41*013108b6SEugenio Pérez uint64_t iova_min, 42*013108b6SEugenio Pérez uint64_t iova_max) 43108a6481SCindy Lu { 44*013108b6SEugenio Pérez Int128 llend; 45*013108b6SEugenio Pérez 46*013108b6SEugenio Pérez if ((!memory_region_is_ram(section->mr) && 47108a6481SCindy Lu !memory_region_is_iommu(section->mr)) || 48c64038c9SEugenio Pérez memory_region_is_protected(section->mr) || 49d60c75d2SJason Wang /* vhost-vDPA doesn't allow MMIO to be mapped */ 50*013108b6SEugenio Pérez memory_region_is_ram_device(section->mr)) { 51*013108b6SEugenio Pérez return true; 52*013108b6SEugenio Pérez } 53*013108b6SEugenio Pérez 54*013108b6SEugenio Pérez if (section->offset_within_address_space < iova_min) { 55*013108b6SEugenio Pérez error_report("RAM section out of device range (min=0x%" PRIx64 56*013108b6SEugenio Pérez ", addr=0x%" HWADDR_PRIx ")", 57*013108b6SEugenio Pérez iova_min, section->offset_within_address_space); 58*013108b6SEugenio Pérez return true; 59*013108b6SEugenio Pérez } 60*013108b6SEugenio Pérez 61*013108b6SEugenio Pérez llend = vhost_vdpa_section_end(section); 62*013108b6SEugenio Pérez if (int128_gt(llend, int128_make64(iova_max))) { 63*013108b6SEugenio Pérez error_report("RAM section out of device range (max=0x%" PRIx64 64*013108b6SEugenio Pérez ", end addr=0x%" PRIx64 ")", 65*013108b6SEugenio Pérez iova_max, int128_get64(llend)); 66*013108b6SEugenio Pérez return true; 67*013108b6SEugenio Pérez } 68*013108b6SEugenio Pérez 69*013108b6SEugenio Pérez return false; 70108a6481SCindy Lu } 71108a6481SCindy Lu 72108a6481SCindy Lu static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, 73108a6481SCindy Lu void *vaddr, bool readonly) 74108a6481SCindy Lu { 75386494f2SCindy Lu struct vhost_msg_v2 msg = {}; 76108a6481SCindy Lu int fd = v->device_fd; 77108a6481SCindy Lu int ret = 0; 78108a6481SCindy Lu 79108a6481SCindy Lu msg.type = v->msg_type; 80108a6481SCindy Lu msg.iotlb.iova = iova; 81108a6481SCindy Lu msg.iotlb.size = size; 82108a6481SCindy Lu msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr; 83108a6481SCindy Lu msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; 84108a6481SCindy Lu msg.iotlb.type = VHOST_IOTLB_UPDATE; 85108a6481SCindy Lu 86778e67deSLaurent Vivier trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.iotlb.iova, msg.iotlb.size, 87778e67deSLaurent Vivier msg.iotlb.uaddr, msg.iotlb.perm, msg.iotlb.type); 88778e67deSLaurent Vivier 89108a6481SCindy Lu if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 90108a6481SCindy Lu error_report("failed to write, fd=%d, errno=%d (%s)", 91108a6481SCindy Lu fd, errno, strerror(errno)); 92108a6481SCindy Lu return -EIO ; 93108a6481SCindy Lu } 94108a6481SCindy Lu 95108a6481SCindy Lu return ret; 96108a6481SCindy Lu } 97108a6481SCindy Lu 98108a6481SCindy Lu static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, 99108a6481SCindy Lu hwaddr size) 100108a6481SCindy Lu { 101386494f2SCindy Lu struct vhost_msg_v2 msg = {}; 102108a6481SCindy Lu int fd = v->device_fd; 103108a6481SCindy Lu int ret = 0; 104108a6481SCindy Lu 105108a6481SCindy Lu msg.type = v->msg_type; 106108a6481SCindy Lu msg.iotlb.iova = iova; 107108a6481SCindy Lu msg.iotlb.size = size; 108108a6481SCindy Lu msg.iotlb.type = VHOST_IOTLB_INVALIDATE; 109108a6481SCindy Lu 110778e67deSLaurent Vivier trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.iotlb.iova, 111778e67deSLaurent Vivier msg.iotlb.size, msg.iotlb.type); 112778e67deSLaurent Vivier 113108a6481SCindy Lu if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 114108a6481SCindy Lu error_report("failed to write, fd=%d, errno=%d (%s)", 115108a6481SCindy Lu fd, errno, strerror(errno)); 116108a6481SCindy Lu return -EIO ; 117108a6481SCindy Lu } 118108a6481SCindy Lu 119108a6481SCindy Lu return ret; 120108a6481SCindy Lu } 121108a6481SCindy Lu 122e6db5df7SEugenio Pérez static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v) 123a5bd0580SJason Wang { 124a5bd0580SJason Wang int fd = v->device_fd; 125e6db5df7SEugenio Pérez struct vhost_msg_v2 msg = { 126e6db5df7SEugenio Pérez .type = v->msg_type, 127e6db5df7SEugenio Pérez .iotlb.type = VHOST_IOTLB_BATCH_BEGIN, 128e6db5df7SEugenio Pérez }; 129a5bd0580SJason Wang 130a5bd0580SJason Wang if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 131a5bd0580SJason Wang error_report("failed to write, fd=%d, errno=%d (%s)", 132a5bd0580SJason Wang fd, errno, strerror(errno)); 133a5bd0580SJason Wang } 134a5bd0580SJason Wang } 135a5bd0580SJason Wang 136e6db5df7SEugenio Pérez static void vhost_vdpa_iotlb_batch_begin_once(struct vhost_vdpa *v) 137e6db5df7SEugenio Pérez { 138e6db5df7SEugenio Pérez if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) && 139e6db5df7SEugenio Pérez !v->iotlb_batch_begin_sent) { 140e6db5df7SEugenio Pérez vhost_vdpa_listener_begin_batch(v); 141e6db5df7SEugenio Pérez } 142e6db5df7SEugenio Pérez 143e6db5df7SEugenio Pérez v->iotlb_batch_begin_sent = true; 144e6db5df7SEugenio Pérez } 145e6db5df7SEugenio Pérez 146a5bd0580SJason Wang static void vhost_vdpa_listener_commit(MemoryListener *listener) 147a5bd0580SJason Wang { 148a5bd0580SJason Wang struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 149a5bd0580SJason Wang struct vhost_dev *dev = v->dev; 1508acb3218SPhilippe Mathieu-Daudé struct vhost_msg_v2 msg = {}; 151a5bd0580SJason Wang int fd = v->device_fd; 152a5bd0580SJason Wang 153a5bd0580SJason Wang if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { 154a5bd0580SJason Wang return; 155a5bd0580SJason Wang } 156a5bd0580SJason Wang 157e6db5df7SEugenio Pérez if (!v->iotlb_batch_begin_sent) { 158e6db5df7SEugenio Pérez return; 159e6db5df7SEugenio Pérez } 160e6db5df7SEugenio Pérez 161a5bd0580SJason Wang msg.type = v->msg_type; 162a5bd0580SJason Wang msg.iotlb.type = VHOST_IOTLB_BATCH_END; 163a5bd0580SJason Wang 164a5bd0580SJason Wang if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 165a5bd0580SJason Wang error_report("failed to write, fd=%d, errno=%d (%s)", 166a5bd0580SJason Wang fd, errno, strerror(errno)); 167a5bd0580SJason Wang } 168e6db5df7SEugenio Pérez 169e6db5df7SEugenio Pérez v->iotlb_batch_begin_sent = false; 170a5bd0580SJason Wang } 171a5bd0580SJason Wang 172108a6481SCindy Lu static void vhost_vdpa_listener_region_add(MemoryListener *listener, 173108a6481SCindy Lu MemoryRegionSection *section) 174108a6481SCindy Lu { 175108a6481SCindy Lu struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 176108a6481SCindy Lu hwaddr iova; 177108a6481SCindy Lu Int128 llend, llsize; 178108a6481SCindy Lu void *vaddr; 179108a6481SCindy Lu int ret; 180108a6481SCindy Lu 181*013108b6SEugenio Pérez if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, 182*013108b6SEugenio Pérez v->iova_range.last)) { 183108a6481SCindy Lu return; 184108a6481SCindy Lu } 185108a6481SCindy Lu 186108a6481SCindy Lu if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 187108a6481SCindy Lu (section->offset_within_region & ~TARGET_PAGE_MASK))) { 188108a6481SCindy Lu error_report("%s received unaligned region", __func__); 189108a6481SCindy Lu return; 190108a6481SCindy Lu } 191108a6481SCindy Lu 192108a6481SCindy Lu iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 193032e4d68SEugenio Pérez llend = vhost_vdpa_section_end(section); 194108a6481SCindy Lu if (int128_ge(int128_make64(iova), llend)) { 195108a6481SCindy Lu return; 196108a6481SCindy Lu } 197108a6481SCindy Lu 198108a6481SCindy Lu memory_region_ref(section->mr); 199108a6481SCindy Lu 200108a6481SCindy Lu /* Here we assume that memory_region_is_ram(section->mr)==true */ 201108a6481SCindy Lu 202108a6481SCindy Lu vaddr = memory_region_get_ram_ptr(section->mr) + 203108a6481SCindy Lu section->offset_within_region + 204108a6481SCindy Lu (iova - section->offset_within_address_space); 205108a6481SCindy Lu 206778e67deSLaurent Vivier trace_vhost_vdpa_listener_region_add(v, iova, int128_get64(llend), 207778e67deSLaurent Vivier vaddr, section->readonly); 208778e67deSLaurent Vivier 209108a6481SCindy Lu llsize = int128_sub(llend, int128_make64(iova)); 210108a6481SCindy Lu 211e6db5df7SEugenio Pérez vhost_vdpa_iotlb_batch_begin_once(v); 212108a6481SCindy Lu ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), 213108a6481SCindy Lu vaddr, section->readonly); 214108a6481SCindy Lu if (ret) { 215108a6481SCindy Lu error_report("vhost vdpa map fail!"); 216108a6481SCindy Lu goto fail; 217108a6481SCindy Lu } 218108a6481SCindy Lu 219108a6481SCindy Lu return; 220108a6481SCindy Lu 221108a6481SCindy Lu fail: 222108a6481SCindy Lu /* 223108a6481SCindy Lu * On the initfn path, store the first error in the container so we 224108a6481SCindy Lu * can gracefully fail. Runtime, there's not much we can do other 225108a6481SCindy Lu * than throw a hardware error. 226108a6481SCindy Lu */ 227108a6481SCindy Lu error_report("vhost-vdpa: DMA mapping failed, unable to continue"); 228108a6481SCindy Lu return; 229108a6481SCindy Lu 230108a6481SCindy Lu } 231108a6481SCindy Lu 232108a6481SCindy Lu static void vhost_vdpa_listener_region_del(MemoryListener *listener, 233108a6481SCindy Lu MemoryRegionSection *section) 234108a6481SCindy Lu { 235108a6481SCindy Lu struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 236108a6481SCindy Lu hwaddr iova; 237108a6481SCindy Lu Int128 llend, llsize; 238108a6481SCindy Lu int ret; 239108a6481SCindy Lu 240*013108b6SEugenio Pérez if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, 241*013108b6SEugenio Pérez v->iova_range.last)) { 242108a6481SCindy Lu return; 243108a6481SCindy Lu } 244108a6481SCindy Lu 245108a6481SCindy Lu if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 246108a6481SCindy Lu (section->offset_within_region & ~TARGET_PAGE_MASK))) { 247108a6481SCindy Lu error_report("%s received unaligned region", __func__); 248108a6481SCindy Lu return; 249108a6481SCindy Lu } 250108a6481SCindy Lu 251108a6481SCindy Lu iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 252032e4d68SEugenio Pérez llend = vhost_vdpa_section_end(section); 253108a6481SCindy Lu 254778e67deSLaurent Vivier trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(llend)); 255778e67deSLaurent Vivier 256108a6481SCindy Lu if (int128_ge(int128_make64(iova), llend)) { 257108a6481SCindy Lu return; 258108a6481SCindy Lu } 259108a6481SCindy Lu 260108a6481SCindy Lu llsize = int128_sub(llend, int128_make64(iova)); 261108a6481SCindy Lu 262e6db5df7SEugenio Pérez vhost_vdpa_iotlb_batch_begin_once(v); 263108a6481SCindy Lu ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); 264108a6481SCindy Lu if (ret) { 265108a6481SCindy Lu error_report("vhost_vdpa dma unmap error!"); 266108a6481SCindy Lu } 267108a6481SCindy Lu 268108a6481SCindy Lu memory_region_unref(section->mr); 269108a6481SCindy Lu } 270108a6481SCindy Lu /* 271108a6481SCindy Lu * IOTLB API is used by vhost-vpda which requires incremental updating 272108a6481SCindy Lu * of the mapping. So we can not use generic vhost memory listener which 273108a6481SCindy Lu * depends on the addnop(). 274108a6481SCindy Lu */ 275108a6481SCindy Lu static const MemoryListener vhost_vdpa_memory_listener = { 276142518bdSPeter Xu .name = "vhost-vdpa", 277a5bd0580SJason Wang .commit = vhost_vdpa_listener_commit, 278108a6481SCindy Lu .region_add = vhost_vdpa_listener_region_add, 279108a6481SCindy Lu .region_del = vhost_vdpa_listener_region_del, 280108a6481SCindy Lu }; 281108a6481SCindy Lu 282108a6481SCindy Lu static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, 283108a6481SCindy Lu void *arg) 284108a6481SCindy Lu { 285108a6481SCindy Lu struct vhost_vdpa *v = dev->opaque; 286108a6481SCindy Lu int fd = v->device_fd; 287f2a6e6c4SKevin Wolf int ret; 288108a6481SCindy Lu 289108a6481SCindy Lu assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 290108a6481SCindy Lu 291f2a6e6c4SKevin Wolf ret = ioctl(fd, request, arg); 292f2a6e6c4SKevin Wolf return ret < 0 ? -errno : ret; 293108a6481SCindy Lu } 294108a6481SCindy Lu 295108a6481SCindy Lu static void vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) 296108a6481SCindy Lu { 297108a6481SCindy Lu uint8_t s; 298108a6481SCindy Lu 299778e67deSLaurent Vivier trace_vhost_vdpa_add_status(dev, status); 300108a6481SCindy Lu if (vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s)) { 301108a6481SCindy Lu return; 302108a6481SCindy Lu } 303108a6481SCindy Lu 304108a6481SCindy Lu s |= status; 305108a6481SCindy Lu 306108a6481SCindy Lu vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s); 307108a6481SCindy Lu } 308108a6481SCindy Lu 309*013108b6SEugenio Pérez static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v) 310*013108b6SEugenio Pérez { 311*013108b6SEugenio Pérez int ret = vhost_vdpa_call(v->dev, VHOST_VDPA_GET_IOVA_RANGE, 312*013108b6SEugenio Pérez &v->iova_range); 313*013108b6SEugenio Pérez if (ret != 0) { 314*013108b6SEugenio Pérez v->iova_range.first = 0; 315*013108b6SEugenio Pérez v->iova_range.last = UINT64_MAX; 316*013108b6SEugenio Pérez } 317*013108b6SEugenio Pérez 318*013108b6SEugenio Pérez trace_vhost_vdpa_get_iova_range(v->dev, v->iova_range.first, 319*013108b6SEugenio Pérez v->iova_range.last); 320*013108b6SEugenio Pérez } 321*013108b6SEugenio Pérez 32228770ff9SKevin Wolf static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) 323108a6481SCindy Lu { 324108a6481SCindy Lu struct vhost_vdpa *v; 325108a6481SCindy Lu assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 326778e67deSLaurent Vivier trace_vhost_vdpa_init(dev, opaque); 327108a6481SCindy Lu 328108a6481SCindy Lu v = opaque; 329a5bd0580SJason Wang v->dev = dev; 330108a6481SCindy Lu dev->opaque = opaque ; 331108a6481SCindy Lu v->listener = vhost_vdpa_memory_listener; 332108a6481SCindy Lu v->msg_type = VHOST_IOTLB_MSG_V2; 333108a6481SCindy Lu 334*013108b6SEugenio Pérez vhost_vdpa_get_iova_range(v); 335108a6481SCindy Lu vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 336108a6481SCindy Lu VIRTIO_CONFIG_S_DRIVER); 337108a6481SCindy Lu 338108a6481SCindy Lu return 0; 339108a6481SCindy Lu } 340108a6481SCindy Lu 341d0416d48SJason Wang static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev, 342d0416d48SJason Wang int queue_index) 343d0416d48SJason Wang { 344d0416d48SJason Wang size_t page_size = qemu_real_host_page_size; 345d0416d48SJason Wang struct vhost_vdpa *v = dev->opaque; 346d0416d48SJason Wang VirtIODevice *vdev = dev->vdev; 347d0416d48SJason Wang VhostVDPAHostNotifier *n; 348d0416d48SJason Wang 349d0416d48SJason Wang n = &v->notifier[queue_index]; 350d0416d48SJason Wang 351d0416d48SJason Wang if (n->addr) { 352d0416d48SJason Wang virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, false); 353d0416d48SJason Wang object_unparent(OBJECT(&n->mr)); 354d0416d48SJason Wang munmap(n->addr, page_size); 355d0416d48SJason Wang n->addr = NULL; 356d0416d48SJason Wang } 357d0416d48SJason Wang } 358d0416d48SJason Wang 359d0416d48SJason Wang static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n) 360d0416d48SJason Wang { 361d0416d48SJason Wang int i; 362d0416d48SJason Wang 363d0416d48SJason Wang for (i = 0; i < n; i++) { 364d0416d48SJason Wang vhost_vdpa_host_notifier_uninit(dev, i); 365d0416d48SJason Wang } 366d0416d48SJason Wang } 367d0416d48SJason Wang 368d0416d48SJason Wang static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index) 369d0416d48SJason Wang { 370d0416d48SJason Wang size_t page_size = qemu_real_host_page_size; 371d0416d48SJason Wang struct vhost_vdpa *v = dev->opaque; 372d0416d48SJason Wang VirtIODevice *vdev = dev->vdev; 373d0416d48SJason Wang VhostVDPAHostNotifier *n; 374d0416d48SJason Wang int fd = v->device_fd; 375d0416d48SJason Wang void *addr; 376d0416d48SJason Wang char *name; 377d0416d48SJason Wang 378d0416d48SJason Wang vhost_vdpa_host_notifier_uninit(dev, queue_index); 379d0416d48SJason Wang 380d0416d48SJason Wang n = &v->notifier[queue_index]; 381d0416d48SJason Wang 382d0416d48SJason Wang addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd, 383d0416d48SJason Wang queue_index * page_size); 384d0416d48SJason Wang if (addr == MAP_FAILED) { 385d0416d48SJason Wang goto err; 386d0416d48SJason Wang } 387d0416d48SJason Wang 388d0416d48SJason Wang name = g_strdup_printf("vhost-vdpa/host-notifier@%p mmaps[%d]", 389d0416d48SJason Wang v, queue_index); 390d0416d48SJason Wang memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 391d0416d48SJason Wang page_size, addr); 392d0416d48SJason Wang g_free(name); 393d0416d48SJason Wang 394d0416d48SJason Wang if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) { 395d0416d48SJason Wang munmap(addr, page_size); 396d0416d48SJason Wang goto err; 397d0416d48SJason Wang } 398d0416d48SJason Wang n->addr = addr; 399d0416d48SJason Wang 400d0416d48SJason Wang return 0; 401d0416d48SJason Wang 402d0416d48SJason Wang err: 403d0416d48SJason Wang return -1; 404d0416d48SJason Wang } 405d0416d48SJason Wang 406d0416d48SJason Wang static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev) 407d0416d48SJason Wang { 408d0416d48SJason Wang int i; 409d0416d48SJason Wang 410d0416d48SJason Wang for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) { 411d0416d48SJason Wang if (vhost_vdpa_host_notifier_init(dev, i)) { 412d0416d48SJason Wang goto err; 413d0416d48SJason Wang } 414d0416d48SJason Wang } 415d0416d48SJason Wang 416d0416d48SJason Wang return; 417d0416d48SJason Wang 418d0416d48SJason Wang err: 419d0416d48SJason Wang vhost_vdpa_host_notifiers_uninit(dev, i); 420d0416d48SJason Wang return; 421d0416d48SJason Wang } 422d0416d48SJason Wang 423108a6481SCindy Lu static int vhost_vdpa_cleanup(struct vhost_dev *dev) 424108a6481SCindy Lu { 425108a6481SCindy Lu struct vhost_vdpa *v; 426108a6481SCindy Lu assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 427108a6481SCindy Lu v = dev->opaque; 428778e67deSLaurent Vivier trace_vhost_vdpa_cleanup(dev, v); 429d0416d48SJason Wang vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); 430108a6481SCindy Lu memory_listener_unregister(&v->listener); 431108a6481SCindy Lu 432108a6481SCindy Lu dev->opaque = NULL; 433108a6481SCindy Lu return 0; 434108a6481SCindy Lu } 435108a6481SCindy Lu 436108a6481SCindy Lu static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) 437108a6481SCindy Lu { 438778e67deSLaurent Vivier trace_vhost_vdpa_memslots_limit(dev, INT_MAX); 439108a6481SCindy Lu return INT_MAX; 440108a6481SCindy Lu } 441108a6481SCindy Lu 442108a6481SCindy Lu static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, 443108a6481SCindy Lu struct vhost_memory *mem) 444108a6481SCindy Lu { 445778e67deSLaurent Vivier trace_vhost_vdpa_set_mem_table(dev, mem->nregions, mem->padding); 446778e67deSLaurent Vivier if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE) && 447778e67deSLaurent Vivier trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS)) { 448778e67deSLaurent Vivier int i; 449778e67deSLaurent Vivier for (i = 0; i < mem->nregions; i++) { 450778e67deSLaurent Vivier trace_vhost_vdpa_dump_regions(dev, i, 451778e67deSLaurent Vivier mem->regions[i].guest_phys_addr, 452778e67deSLaurent Vivier mem->regions[i].memory_size, 453778e67deSLaurent Vivier mem->regions[i].userspace_addr, 454778e67deSLaurent Vivier mem->regions[i].flags_padding); 455778e67deSLaurent Vivier } 456778e67deSLaurent Vivier } 457108a6481SCindy Lu if (mem->padding) { 458108a6481SCindy Lu return -1; 459108a6481SCindy Lu } 460108a6481SCindy Lu 461108a6481SCindy Lu return 0; 462108a6481SCindy Lu } 463108a6481SCindy Lu 464108a6481SCindy Lu static int vhost_vdpa_set_features(struct vhost_dev *dev, 465108a6481SCindy Lu uint64_t features) 466108a6481SCindy Lu { 467108a6481SCindy Lu int ret; 468778e67deSLaurent Vivier trace_vhost_vdpa_set_features(dev, features); 469108a6481SCindy Lu ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features); 470108a6481SCindy Lu uint8_t status = 0; 471108a6481SCindy Lu if (ret) { 472108a6481SCindy Lu return ret; 473108a6481SCindy Lu } 474108a6481SCindy Lu vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 475108a6481SCindy Lu vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status); 476108a6481SCindy Lu 477108a6481SCindy Lu return !(status & VIRTIO_CONFIG_S_FEATURES_OK); 478108a6481SCindy Lu } 479108a6481SCindy Lu 480a5bd0580SJason Wang static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) 481a5bd0580SJason Wang { 482a5bd0580SJason Wang uint64_t features; 483a5bd0580SJason Wang uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | 484a5bd0580SJason Wang 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH; 485a5bd0580SJason Wang int r; 486a5bd0580SJason Wang 487a5bd0580SJason Wang if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { 4882a83e97eSJason Wang return -EFAULT; 489a5bd0580SJason Wang } 490a5bd0580SJason Wang 491a5bd0580SJason Wang features &= f; 492a5bd0580SJason Wang r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); 493a5bd0580SJason Wang if (r) { 4942a83e97eSJason Wang return -EFAULT; 495a5bd0580SJason Wang } 496a5bd0580SJason Wang 497a5bd0580SJason Wang dev->backend_cap = features; 498a5bd0580SJason Wang 499a5bd0580SJason Wang return 0; 500a5bd0580SJason Wang } 501a5bd0580SJason Wang 502c232b8f4SZenghui Yu static int vhost_vdpa_get_device_id(struct vhost_dev *dev, 503108a6481SCindy Lu uint32_t *device_id) 504108a6481SCindy Lu { 505778e67deSLaurent Vivier int ret; 506778e67deSLaurent Vivier ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id); 507778e67deSLaurent Vivier trace_vhost_vdpa_get_device_id(dev, *device_id); 508778e67deSLaurent Vivier return ret; 509108a6481SCindy Lu } 510108a6481SCindy Lu 511108a6481SCindy Lu static int vhost_vdpa_reset_device(struct vhost_dev *dev) 512108a6481SCindy Lu { 513778e67deSLaurent Vivier int ret; 514108a6481SCindy Lu uint8_t status = 0; 515108a6481SCindy Lu 516778e67deSLaurent Vivier ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); 517778e67deSLaurent Vivier trace_vhost_vdpa_reset_device(dev, status); 518778e67deSLaurent Vivier return ret; 519108a6481SCindy Lu } 520108a6481SCindy Lu 521108a6481SCindy Lu static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) 522108a6481SCindy Lu { 523108a6481SCindy Lu assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 524108a6481SCindy Lu 525778e67deSLaurent Vivier trace_vhost_vdpa_get_vq_index(dev, idx, idx - dev->vq_index); 526108a6481SCindy Lu return idx - dev->vq_index; 527108a6481SCindy Lu } 528108a6481SCindy Lu 529108a6481SCindy Lu static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) 530108a6481SCindy Lu { 531108a6481SCindy Lu int i; 532778e67deSLaurent Vivier trace_vhost_vdpa_set_vring_ready(dev); 533108a6481SCindy Lu for (i = 0; i < dev->nvqs; ++i) { 534108a6481SCindy Lu struct vhost_vring_state state = { 535108a6481SCindy Lu .index = dev->vq_index + i, 536108a6481SCindy Lu .num = 1, 537108a6481SCindy Lu }; 538108a6481SCindy Lu vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); 539108a6481SCindy Lu } 540108a6481SCindy Lu return 0; 541108a6481SCindy Lu } 542108a6481SCindy Lu 543778e67deSLaurent Vivier static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config, 544778e67deSLaurent Vivier uint32_t config_len) 545778e67deSLaurent Vivier { 546778e67deSLaurent Vivier int b, len; 547778e67deSLaurent Vivier char line[QEMU_HEXDUMP_LINE_LEN]; 548778e67deSLaurent Vivier 549778e67deSLaurent Vivier for (b = 0; b < config_len; b += 16) { 550778e67deSLaurent Vivier len = config_len - b; 551778e67deSLaurent Vivier qemu_hexdump_line(line, b, config, len, false); 552778e67deSLaurent Vivier trace_vhost_vdpa_dump_config(dev, line); 553778e67deSLaurent Vivier } 554778e67deSLaurent Vivier } 555778e67deSLaurent Vivier 556108a6481SCindy Lu static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data, 557108a6481SCindy Lu uint32_t offset, uint32_t size, 558108a6481SCindy Lu uint32_t flags) 559108a6481SCindy Lu { 560108a6481SCindy Lu struct vhost_vdpa_config *config; 561108a6481SCindy Lu int ret; 562108a6481SCindy Lu unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 563986d4f78SLi Qiang 564778e67deSLaurent Vivier trace_vhost_vdpa_set_config(dev, offset, size, flags); 565108a6481SCindy Lu config = g_malloc(size + config_size); 566108a6481SCindy Lu config->off = offset; 567108a6481SCindy Lu config->len = size; 568108a6481SCindy Lu memcpy(config->buf, data, size); 569778e67deSLaurent Vivier if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG) && 570778e67deSLaurent Vivier trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) { 571778e67deSLaurent Vivier vhost_vdpa_dump_config(dev, data, size); 572778e67deSLaurent Vivier } 573108a6481SCindy Lu ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config); 574108a6481SCindy Lu g_free(config); 575108a6481SCindy Lu return ret; 576108a6481SCindy Lu } 577108a6481SCindy Lu 578108a6481SCindy Lu static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config, 57950de5138SKevin Wolf uint32_t config_len, Error **errp) 580108a6481SCindy Lu { 581108a6481SCindy Lu struct vhost_vdpa_config *v_config; 582108a6481SCindy Lu unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 583108a6481SCindy Lu int ret; 584108a6481SCindy Lu 585778e67deSLaurent Vivier trace_vhost_vdpa_get_config(dev, config, config_len); 586108a6481SCindy Lu v_config = g_malloc(config_len + config_size); 587108a6481SCindy Lu v_config->len = config_len; 588108a6481SCindy Lu v_config->off = 0; 589108a6481SCindy Lu ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config); 590108a6481SCindy Lu memcpy(config, v_config->buf, config_len); 591108a6481SCindy Lu g_free(v_config); 592778e67deSLaurent Vivier if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG) && 593778e67deSLaurent Vivier trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) { 594778e67deSLaurent Vivier vhost_vdpa_dump_config(dev, config, config_len); 595778e67deSLaurent Vivier } 596108a6481SCindy Lu return ret; 597108a6481SCindy Lu } 598108a6481SCindy Lu 599108a6481SCindy Lu static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) 600108a6481SCindy Lu { 601108a6481SCindy Lu struct vhost_vdpa *v = dev->opaque; 602778e67deSLaurent Vivier trace_vhost_vdpa_dev_start(dev, started); 603108a6481SCindy Lu if (started) { 604108a6481SCindy Lu uint8_t status = 0; 605108a6481SCindy Lu memory_listener_register(&v->listener, &address_space_memory); 606d0416d48SJason Wang vhost_vdpa_host_notifiers_init(dev); 607108a6481SCindy Lu vhost_vdpa_set_vring_ready(dev); 608108a6481SCindy Lu vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); 609108a6481SCindy Lu vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status); 610108a6481SCindy Lu 611108a6481SCindy Lu return !(status & VIRTIO_CONFIG_S_DRIVER_OK); 612108a6481SCindy Lu } else { 613108a6481SCindy Lu vhost_vdpa_reset_device(dev); 614108a6481SCindy Lu vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 615108a6481SCindy Lu VIRTIO_CONFIG_S_DRIVER); 616d0416d48SJason Wang vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); 617108a6481SCindy Lu memory_listener_unregister(&v->listener); 618108a6481SCindy Lu 619108a6481SCindy Lu return 0; 620108a6481SCindy Lu } 621108a6481SCindy Lu } 622108a6481SCindy Lu 623108a6481SCindy Lu static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, 624108a6481SCindy Lu struct vhost_log *log) 625108a6481SCindy Lu { 626778e67deSLaurent Vivier trace_vhost_vdpa_set_log_base(dev, base, log->size, log->refcnt, log->fd, 627778e67deSLaurent Vivier log->log); 628108a6481SCindy Lu return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base); 629108a6481SCindy Lu } 630108a6481SCindy Lu 631108a6481SCindy Lu static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, 632108a6481SCindy Lu struct vhost_vring_addr *addr) 633108a6481SCindy Lu { 634778e67deSLaurent Vivier trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags, 635778e67deSLaurent Vivier addr->desc_user_addr, addr->used_user_addr, 636778e67deSLaurent Vivier addr->avail_user_addr, 637778e67deSLaurent Vivier addr->log_guest_addr); 638108a6481SCindy Lu return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr); 639108a6481SCindy Lu } 640108a6481SCindy Lu 641108a6481SCindy Lu static int vhost_vdpa_set_vring_num(struct vhost_dev *dev, 642108a6481SCindy Lu struct vhost_vring_state *ring) 643108a6481SCindy Lu { 644778e67deSLaurent Vivier trace_vhost_vdpa_set_vring_num(dev, ring->index, ring->num); 645108a6481SCindy Lu return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring); 646108a6481SCindy Lu } 647108a6481SCindy Lu 648108a6481SCindy Lu static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, 649108a6481SCindy Lu struct vhost_vring_state *ring) 650108a6481SCindy Lu { 651778e67deSLaurent Vivier trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num); 652108a6481SCindy Lu return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring); 653108a6481SCindy Lu } 654108a6481SCindy Lu 655108a6481SCindy Lu static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, 656108a6481SCindy Lu struct vhost_vring_state *ring) 657108a6481SCindy Lu { 658778e67deSLaurent Vivier int ret; 659778e67deSLaurent Vivier 660778e67deSLaurent Vivier ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); 661778e67deSLaurent Vivier trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num); 662778e67deSLaurent Vivier return ret; 663108a6481SCindy Lu } 664108a6481SCindy Lu 665108a6481SCindy Lu static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev, 666108a6481SCindy Lu struct vhost_vring_file *file) 667108a6481SCindy Lu { 668778e67deSLaurent Vivier trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd); 669108a6481SCindy Lu return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file); 670108a6481SCindy Lu } 671108a6481SCindy Lu 672108a6481SCindy Lu static int vhost_vdpa_set_vring_call(struct vhost_dev *dev, 673108a6481SCindy Lu struct vhost_vring_file *file) 674108a6481SCindy Lu { 675778e67deSLaurent Vivier trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd); 676108a6481SCindy Lu return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file); 677108a6481SCindy Lu } 678108a6481SCindy Lu 679108a6481SCindy Lu static int vhost_vdpa_get_features(struct vhost_dev *dev, 680108a6481SCindy Lu uint64_t *features) 681108a6481SCindy Lu { 682778e67deSLaurent Vivier int ret; 683778e67deSLaurent Vivier 684778e67deSLaurent Vivier ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features); 685778e67deSLaurent Vivier trace_vhost_vdpa_get_features(dev, *features); 686778e67deSLaurent Vivier return ret; 687108a6481SCindy Lu } 688108a6481SCindy Lu 689108a6481SCindy Lu static int vhost_vdpa_set_owner(struct vhost_dev *dev) 690108a6481SCindy Lu { 691778e67deSLaurent Vivier trace_vhost_vdpa_set_owner(dev); 692108a6481SCindy Lu return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); 693108a6481SCindy Lu } 694108a6481SCindy Lu 695108a6481SCindy Lu static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev, 696108a6481SCindy Lu struct vhost_vring_addr *addr, struct vhost_virtqueue *vq) 697108a6481SCindy Lu { 698108a6481SCindy Lu assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 699108a6481SCindy Lu addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys; 700108a6481SCindy Lu addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys; 701108a6481SCindy Lu addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys; 702778e67deSLaurent Vivier trace_vhost_vdpa_vq_get_addr(dev, vq, addr->desc_user_addr, 703778e67deSLaurent Vivier addr->avail_user_addr, addr->used_user_addr); 704108a6481SCindy Lu return 0; 705108a6481SCindy Lu } 706108a6481SCindy Lu 707108a6481SCindy Lu static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) 708108a6481SCindy Lu { 709108a6481SCindy Lu return true; 710108a6481SCindy Lu } 711108a6481SCindy Lu 712108a6481SCindy Lu const VhostOps vdpa_ops = { 713108a6481SCindy Lu .backend_type = VHOST_BACKEND_TYPE_VDPA, 714108a6481SCindy Lu .vhost_backend_init = vhost_vdpa_init, 715108a6481SCindy Lu .vhost_backend_cleanup = vhost_vdpa_cleanup, 716108a6481SCindy Lu .vhost_set_log_base = vhost_vdpa_set_log_base, 717108a6481SCindy Lu .vhost_set_vring_addr = vhost_vdpa_set_vring_addr, 718108a6481SCindy Lu .vhost_set_vring_num = vhost_vdpa_set_vring_num, 719108a6481SCindy Lu .vhost_set_vring_base = vhost_vdpa_set_vring_base, 720108a6481SCindy Lu .vhost_get_vring_base = vhost_vdpa_get_vring_base, 721108a6481SCindy Lu .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, 722108a6481SCindy Lu .vhost_set_vring_call = vhost_vdpa_set_vring_call, 723108a6481SCindy Lu .vhost_get_features = vhost_vdpa_get_features, 724a5bd0580SJason Wang .vhost_set_backend_cap = vhost_vdpa_set_backend_cap, 725108a6481SCindy Lu .vhost_set_owner = vhost_vdpa_set_owner, 726108a6481SCindy Lu .vhost_set_vring_endian = NULL, 727108a6481SCindy Lu .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, 728108a6481SCindy Lu .vhost_set_mem_table = vhost_vdpa_set_mem_table, 729108a6481SCindy Lu .vhost_set_features = vhost_vdpa_set_features, 730108a6481SCindy Lu .vhost_reset_device = vhost_vdpa_reset_device, 731108a6481SCindy Lu .vhost_get_vq_index = vhost_vdpa_get_vq_index, 732108a6481SCindy Lu .vhost_get_config = vhost_vdpa_get_config, 733108a6481SCindy Lu .vhost_set_config = vhost_vdpa_set_config, 734108a6481SCindy Lu .vhost_requires_shm_log = NULL, 735108a6481SCindy Lu .vhost_migration_done = NULL, 736108a6481SCindy Lu .vhost_backend_can_merge = NULL, 737108a6481SCindy Lu .vhost_net_set_mtu = NULL, 738108a6481SCindy Lu .vhost_set_iotlb_callback = NULL, 739108a6481SCindy Lu .vhost_send_device_iotlb_msg = NULL, 740108a6481SCindy Lu .vhost_dev_start = vhost_vdpa_dev_start, 741108a6481SCindy Lu .vhost_get_device_id = vhost_vdpa_get_device_id, 742108a6481SCindy Lu .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, 743108a6481SCindy Lu .vhost_force_iommu = vhost_vdpa_force_iommu, 744108a6481SCindy Lu }; 745