19fca2b7dSJohn Levon /* 29fca2b7dSJohn Levon * Container for vfio-user IOMMU type: rather than communicating with the kernel 39fca2b7dSJohn Levon * vfio driver, we communicate over a socket to a server using the vfio-user 49fca2b7dSJohn Levon * protocol. 59fca2b7dSJohn Levon * 69fca2b7dSJohn Levon * SPDX-License-Identifier: GPL-2.0-or-later 79fca2b7dSJohn Levon */ 89fca2b7dSJohn Levon 99fca2b7dSJohn Levon #include <sys/ioctl.h> 109fca2b7dSJohn Levon #include <linux/vfio.h> 119fca2b7dSJohn Levon #include "qemu/osdep.h" 129fca2b7dSJohn Levon 139fca2b7dSJohn Levon #include "hw/vfio-user/container.h" 143bdb738bSJohn Levon #include "hw/vfio-user/device.h" 15*18e899e6SJohn Levon #include "hw/vfio-user/trace.h" 169fca2b7dSJohn Levon #include "hw/vfio/vfio-cpr.h" 179fca2b7dSJohn Levon #include "hw/vfio/vfio-device.h" 189fca2b7dSJohn Levon #include "hw/vfio/vfio-listener.h" 199fca2b7dSJohn Levon #include "qapi/error.h" 209fca2b7dSJohn Levon 21*18e899e6SJohn Levon /* 22*18e899e6SJohn Levon * When DMA space is the physical address space, the region add/del listeners 23*18e899e6SJohn Levon * will fire during memory update transactions. These depend on BQL being held, 24*18e899e6SJohn Levon * so do any resulting map/demap ops async while keeping BQL. 25*18e899e6SJohn Levon */ 26*18e899e6SJohn Levon static void vfio_user_listener_begin(VFIOContainerBase *bcontainer) 27*18e899e6SJohn Levon { 28*18e899e6SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, 29*18e899e6SJohn Levon bcontainer); 30*18e899e6SJohn Levon 31*18e899e6SJohn Levon container->proxy->async_ops = true; 32*18e899e6SJohn Levon } 33*18e899e6SJohn Levon 34*18e899e6SJohn Levon static void vfio_user_listener_commit(VFIOContainerBase *bcontainer) 35*18e899e6SJohn Levon { 36*18e899e6SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, 37*18e899e6SJohn Levon bcontainer); 38*18e899e6SJohn Levon 39*18e899e6SJohn Levon /* wait here for any async requests sent during the transaction */ 40*18e899e6SJohn Levon container->proxy->async_ops = false; 41*18e899e6SJohn Levon vfio_user_wait_reqs(container->proxy); 42*18e899e6SJohn Levon } 43*18e899e6SJohn Levon 449fca2b7dSJohn Levon static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer, 459fca2b7dSJohn Levon hwaddr iova, ram_addr_t size, 469fca2b7dSJohn Levon IOMMUTLBEntry *iotlb, bool unmap_all) 479fca2b7dSJohn Levon { 48*18e899e6SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, 49*18e899e6SJohn Levon bcontainer); 50*18e899e6SJohn Levon Error *local_err = NULL; 51*18e899e6SJohn Levon int ret = 0; 52*18e899e6SJohn Levon 53*18e899e6SJohn Levon VFIOUserDMAUnmap *msgp = g_malloc(sizeof(*msgp)); 54*18e899e6SJohn Levon 55*18e899e6SJohn Levon vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_UNMAP, sizeof(*msgp), 0); 56*18e899e6SJohn Levon msgp->argsz = sizeof(struct vfio_iommu_type1_dma_unmap); 57*18e899e6SJohn Levon msgp->flags = unmap_all ? VFIO_DMA_UNMAP_FLAG_ALL : 0; 58*18e899e6SJohn Levon msgp->iova = iova; 59*18e899e6SJohn Levon msgp->size = size; 60*18e899e6SJohn Levon trace_vfio_user_dma_unmap(msgp->iova, msgp->size, msgp->flags, 61*18e899e6SJohn Levon container->proxy->async_ops); 62*18e899e6SJohn Levon 63*18e899e6SJohn Levon if (container->proxy->async_ops) { 64*18e899e6SJohn Levon if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, NULL, 65*18e899e6SJohn Levon 0, &local_err)) { 66*18e899e6SJohn Levon error_report_err(local_err); 67*18e899e6SJohn Levon ret = -EFAULT; 68*18e899e6SJohn Levon } else { 69*18e899e6SJohn Levon ret = 0; 70*18e899e6SJohn Levon } 71*18e899e6SJohn Levon } else { 72*18e899e6SJohn Levon if (!vfio_user_send_wait(container->proxy, &msgp->hdr, NULL, 73*18e899e6SJohn Levon 0, &local_err)) { 74*18e899e6SJohn Levon error_report_err(local_err); 75*18e899e6SJohn Levon ret = -EFAULT; 76*18e899e6SJohn Levon } 77*18e899e6SJohn Levon 78*18e899e6SJohn Levon if (msgp->hdr.flags & VFIO_USER_ERROR) { 79*18e899e6SJohn Levon ret = -msgp->hdr.error_reply; 80*18e899e6SJohn Levon } 81*18e899e6SJohn Levon 82*18e899e6SJohn Levon g_free(msgp); 83*18e899e6SJohn Levon } 84*18e899e6SJohn Levon 85*18e899e6SJohn Levon return ret; 869fca2b7dSJohn Levon } 879fca2b7dSJohn Levon 889fca2b7dSJohn Levon static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, 899fca2b7dSJohn Levon ram_addr_t size, void *vaddr, bool readonly, 909fca2b7dSJohn Levon MemoryRegion *mrp) 919fca2b7dSJohn Levon { 92*18e899e6SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, 93*18e899e6SJohn Levon bcontainer); 94*18e899e6SJohn Levon int fd = memory_region_get_fd(mrp); 95*18e899e6SJohn Levon Error *local_err = NULL; 96*18e899e6SJohn Levon int ret; 97*18e899e6SJohn Levon 98*18e899e6SJohn Levon VFIOUserFDs *fds = NULL; 99*18e899e6SJohn Levon VFIOUserDMAMap *msgp = g_malloc0(sizeof(*msgp)); 100*18e899e6SJohn Levon 101*18e899e6SJohn Levon vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_MAP, sizeof(*msgp), 0); 102*18e899e6SJohn Levon msgp->argsz = sizeof(struct vfio_iommu_type1_dma_map); 103*18e899e6SJohn Levon msgp->flags = VFIO_DMA_MAP_FLAG_READ; 104*18e899e6SJohn Levon msgp->offset = 0; 105*18e899e6SJohn Levon msgp->iova = iova; 106*18e899e6SJohn Levon msgp->size = size; 107*18e899e6SJohn Levon 108*18e899e6SJohn Levon /* 109*18e899e6SJohn Levon * vaddr enters as a QEMU process address; make it either a file offset 110*18e899e6SJohn Levon * for mapped areas or leave as 0. 111*18e899e6SJohn Levon */ 112*18e899e6SJohn Levon if (fd != -1) { 113*18e899e6SJohn Levon msgp->offset = qemu_ram_block_host_offset(mrp->ram_block, vaddr); 114*18e899e6SJohn Levon } 115*18e899e6SJohn Levon 116*18e899e6SJohn Levon if (!readonly) { 117*18e899e6SJohn Levon msgp->flags |= VFIO_DMA_MAP_FLAG_WRITE; 118*18e899e6SJohn Levon } 119*18e899e6SJohn Levon 120*18e899e6SJohn Levon trace_vfio_user_dma_map(msgp->iova, msgp->size, msgp->offset, msgp->flags, 121*18e899e6SJohn Levon container->proxy->async_ops); 122*18e899e6SJohn Levon 123*18e899e6SJohn Levon /* 124*18e899e6SJohn Levon * The async_ops case sends without blocking. They're later waited for in 125*18e899e6SJohn Levon * vfio_send_wait_reqs. 126*18e899e6SJohn Levon */ 127*18e899e6SJohn Levon if (container->proxy->async_ops) { 128*18e899e6SJohn Levon /* can't use auto variable since we don't block */ 129*18e899e6SJohn Levon if (fd != -1) { 130*18e899e6SJohn Levon fds = vfio_user_getfds(1); 131*18e899e6SJohn Levon fds->send_fds = 1; 132*18e899e6SJohn Levon fds->fds[0] = fd; 133*18e899e6SJohn Levon } 134*18e899e6SJohn Levon 135*18e899e6SJohn Levon if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, fds, 136*18e899e6SJohn Levon 0, &local_err)) { 137*18e899e6SJohn Levon error_report_err(local_err); 138*18e899e6SJohn Levon ret = -EFAULT; 139*18e899e6SJohn Levon } else { 140*18e899e6SJohn Levon ret = 0; 141*18e899e6SJohn Levon } 142*18e899e6SJohn Levon } else { 143*18e899e6SJohn Levon VFIOUserFDs local_fds = { 1, 0, &fd }; 144*18e899e6SJohn Levon 145*18e899e6SJohn Levon fds = fd != -1 ? &local_fds : NULL; 146*18e899e6SJohn Levon 147*18e899e6SJohn Levon if (!vfio_user_send_wait(container->proxy, &msgp->hdr, fds, 148*18e899e6SJohn Levon 0, &local_err)) { 149*18e899e6SJohn Levon error_report_err(local_err); 150*18e899e6SJohn Levon ret = -EFAULT; 151*18e899e6SJohn Levon } 152*18e899e6SJohn Levon 153*18e899e6SJohn Levon if (msgp->hdr.flags & VFIO_USER_ERROR) { 154*18e899e6SJohn Levon ret = -msgp->hdr.error_reply; 155*18e899e6SJohn Levon } 156*18e899e6SJohn Levon 157*18e899e6SJohn Levon g_free(msgp); 158*18e899e6SJohn Levon } 159*18e899e6SJohn Levon 160*18e899e6SJohn Levon return ret; 1619fca2b7dSJohn Levon } 1629fca2b7dSJohn Levon 1639fca2b7dSJohn Levon static int 1649fca2b7dSJohn Levon vfio_user_set_dirty_page_tracking(const VFIOContainerBase *bcontainer, 1659fca2b7dSJohn Levon bool start, Error **errp) 1669fca2b7dSJohn Levon { 1679fca2b7dSJohn Levon error_setg_errno(errp, ENOTSUP, "Not supported"); 1689fca2b7dSJohn Levon return -ENOTSUP; 1699fca2b7dSJohn Levon } 1709fca2b7dSJohn Levon 1719fca2b7dSJohn Levon static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer, 1729fca2b7dSJohn Levon VFIOBitmap *vbmap, hwaddr iova, 1739fca2b7dSJohn Levon hwaddr size, Error **errp) 1749fca2b7dSJohn Levon { 1759fca2b7dSJohn Levon error_setg_errno(errp, ENOTSUP, "Not supported"); 1769fca2b7dSJohn Levon return -ENOTSUP; 1779fca2b7dSJohn Levon } 1789fca2b7dSJohn Levon 1799fca2b7dSJohn Levon static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp) 1809fca2b7dSJohn Levon { 18152ce9c35SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, 18252ce9c35SJohn Levon bcontainer); 18352ce9c35SJohn Levon 18452ce9c35SJohn Levon assert(container->proxy->dma_pgsizes != 0); 18552ce9c35SJohn Levon bcontainer->pgsizes = container->proxy->dma_pgsizes; 18652ce9c35SJohn Levon bcontainer->dma_max_mappings = container->proxy->max_dma; 18752ce9c35SJohn Levon 18852ce9c35SJohn Levon /* No live migration support yet. */ 18952ce9c35SJohn Levon bcontainer->dirty_pages_supported = false; 19052ce9c35SJohn Levon bcontainer->max_dirty_bitmap_size = container->proxy->max_bitmap; 19152ce9c35SJohn Levon bcontainer->dirty_pgsizes = container->proxy->migr_pgsize; 19252ce9c35SJohn Levon 19352ce9c35SJohn Levon return true; 1949fca2b7dSJohn Levon } 1959fca2b7dSJohn Levon 19652ce9c35SJohn Levon static VFIOUserContainer *vfio_user_create_container(VFIODevice *vbasedev, 19752ce9c35SJohn Levon Error **errp) 1989fca2b7dSJohn Levon { 1999fca2b7dSJohn Levon VFIOUserContainer *container; 2009fca2b7dSJohn Levon 2019fca2b7dSJohn Levon container = VFIO_IOMMU_USER(object_new(TYPE_VFIO_IOMMU_USER)); 20252ce9c35SJohn Levon container->proxy = vbasedev->proxy; 2039fca2b7dSJohn Levon return container; 2049fca2b7dSJohn Levon } 2059fca2b7dSJohn Levon 2069fca2b7dSJohn Levon /* 2079fca2b7dSJohn Levon * Try to mirror vfio_container_connect() as much as possible. 2089fca2b7dSJohn Levon */ 2099fca2b7dSJohn Levon static VFIOUserContainer * 21052ce9c35SJohn Levon vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev, 21152ce9c35SJohn Levon Error **errp) 2129fca2b7dSJohn Levon { 2139fca2b7dSJohn Levon VFIOContainerBase *bcontainer; 2149fca2b7dSJohn Levon VFIOUserContainer *container; 2159fca2b7dSJohn Levon VFIOAddressSpace *space; 2169fca2b7dSJohn Levon VFIOIOMMUClass *vioc; 21752ce9c35SJohn Levon int ret; 2189fca2b7dSJohn Levon 2199fca2b7dSJohn Levon space = vfio_address_space_get(as); 2209fca2b7dSJohn Levon 22152ce9c35SJohn Levon container = vfio_user_create_container(vbasedev, errp); 2229fca2b7dSJohn Levon if (!container) { 2239fca2b7dSJohn Levon goto put_space_exit; 2249fca2b7dSJohn Levon } 2259fca2b7dSJohn Levon 2269fca2b7dSJohn Levon bcontainer = &container->bcontainer; 2279fca2b7dSJohn Levon 2289fca2b7dSJohn Levon if (!vfio_cpr_register_container(bcontainer, errp)) { 2299fca2b7dSJohn Levon goto free_container_exit; 2309fca2b7dSJohn Levon } 2319fca2b7dSJohn Levon 23252ce9c35SJohn Levon ret = ram_block_uncoordinated_discard_disable(true); 23352ce9c35SJohn Levon if (ret) { 23452ce9c35SJohn Levon error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); 23552ce9c35SJohn Levon goto unregister_container_exit; 23652ce9c35SJohn Levon } 23752ce9c35SJohn Levon 2389fca2b7dSJohn Levon vioc = VFIO_IOMMU_GET_CLASS(bcontainer); 2399fca2b7dSJohn Levon assert(vioc->setup); 2409fca2b7dSJohn Levon 2419fca2b7dSJohn Levon if (!vioc->setup(bcontainer, errp)) { 24252ce9c35SJohn Levon goto enable_discards_exit; 2439fca2b7dSJohn Levon } 2449fca2b7dSJohn Levon 2459fca2b7dSJohn Levon vfio_address_space_insert(space, bcontainer); 2469fca2b7dSJohn Levon 2479fca2b7dSJohn Levon if (!vfio_listener_register(bcontainer, errp)) { 2489fca2b7dSJohn Levon goto listener_release_exit; 2499fca2b7dSJohn Levon } 2509fca2b7dSJohn Levon 2519fca2b7dSJohn Levon bcontainer->initialized = true; 2529fca2b7dSJohn Levon 2539fca2b7dSJohn Levon return container; 2549fca2b7dSJohn Levon 2559fca2b7dSJohn Levon listener_release_exit: 2569fca2b7dSJohn Levon vfio_listener_unregister(bcontainer); 2579fca2b7dSJohn Levon if (vioc->release) { 2589fca2b7dSJohn Levon vioc->release(bcontainer); 2599fca2b7dSJohn Levon } 2609fca2b7dSJohn Levon 26152ce9c35SJohn Levon enable_discards_exit: 26252ce9c35SJohn Levon ram_block_uncoordinated_discard_disable(false); 26352ce9c35SJohn Levon 2649fca2b7dSJohn Levon unregister_container_exit: 2659fca2b7dSJohn Levon vfio_cpr_unregister_container(bcontainer); 2669fca2b7dSJohn Levon 2679fca2b7dSJohn Levon free_container_exit: 2689fca2b7dSJohn Levon object_unref(container); 2699fca2b7dSJohn Levon 2709fca2b7dSJohn Levon put_space_exit: 2719fca2b7dSJohn Levon vfio_address_space_put(space); 2729fca2b7dSJohn Levon 2739fca2b7dSJohn Levon return NULL; 2749fca2b7dSJohn Levon } 2759fca2b7dSJohn Levon 2769fca2b7dSJohn Levon static void vfio_user_container_disconnect(VFIOUserContainer *container) 2779fca2b7dSJohn Levon { 2789fca2b7dSJohn Levon VFIOContainerBase *bcontainer = &container->bcontainer; 2799fca2b7dSJohn Levon VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); 28052ce9c35SJohn Levon VFIOAddressSpace *space = bcontainer->space; 28152ce9c35SJohn Levon 28252ce9c35SJohn Levon ram_block_uncoordinated_discard_disable(false); 2839fca2b7dSJohn Levon 2849fca2b7dSJohn Levon vfio_listener_unregister(bcontainer); 2859fca2b7dSJohn Levon if (vioc->release) { 2869fca2b7dSJohn Levon vioc->release(bcontainer); 2879fca2b7dSJohn Levon } 2889fca2b7dSJohn Levon 2899fca2b7dSJohn Levon vfio_cpr_unregister_container(bcontainer); 2909fca2b7dSJohn Levon object_unref(container); 2919fca2b7dSJohn Levon 2929fca2b7dSJohn Levon vfio_address_space_put(space); 2939fca2b7dSJohn Levon } 2949fca2b7dSJohn Levon 2959fca2b7dSJohn Levon static bool vfio_user_device_get(VFIOUserContainer *container, 2969fca2b7dSJohn Levon VFIODevice *vbasedev, Error **errp) 2979fca2b7dSJohn Levon { 2983bdb738bSJohn Levon struct vfio_device_info info = { .argsz = sizeof(info) }; 2993bdb738bSJohn Levon 3003bdb738bSJohn Levon 3013bdb738bSJohn Levon if (!vfio_user_get_device_info(vbasedev->proxy, &info, errp)) { 3023bdb738bSJohn Levon return false; 3033bdb738bSJohn Levon } 3049fca2b7dSJohn Levon 3059fca2b7dSJohn Levon vbasedev->fd = -1; 3069fca2b7dSJohn Levon 3079fca2b7dSJohn Levon vfio_device_prepare(vbasedev, &container->bcontainer, &info); 3089fca2b7dSJohn Levon 3099fca2b7dSJohn Levon return true; 3109fca2b7dSJohn Levon } 3119fca2b7dSJohn Levon 3129fca2b7dSJohn Levon /* 3139fca2b7dSJohn Levon * vfio_user_device_attach: attach a device to a new container. 3149fca2b7dSJohn Levon */ 3159fca2b7dSJohn Levon static bool vfio_user_device_attach(const char *name, VFIODevice *vbasedev, 3169fca2b7dSJohn Levon AddressSpace *as, Error **errp) 3179fca2b7dSJohn Levon { 3189fca2b7dSJohn Levon VFIOUserContainer *container; 3199fca2b7dSJohn Levon 32052ce9c35SJohn Levon container = vfio_user_container_connect(as, vbasedev, errp); 3219fca2b7dSJohn Levon if (container == NULL) { 3229fca2b7dSJohn Levon error_prepend(errp, "failed to connect proxy"); 3239fca2b7dSJohn Levon return false; 3249fca2b7dSJohn Levon } 3259fca2b7dSJohn Levon 3269fca2b7dSJohn Levon return vfio_user_device_get(container, vbasedev, errp); 3279fca2b7dSJohn Levon } 3289fca2b7dSJohn Levon 3299fca2b7dSJohn Levon static void vfio_user_device_detach(VFIODevice *vbasedev) 3309fca2b7dSJohn Levon { 3319fca2b7dSJohn Levon VFIOUserContainer *container = container_of(vbasedev->bcontainer, 3329fca2b7dSJohn Levon VFIOUserContainer, bcontainer); 3339fca2b7dSJohn Levon 3349fca2b7dSJohn Levon vfio_device_unprepare(vbasedev); 3359fca2b7dSJohn Levon 3369fca2b7dSJohn Levon vfio_user_container_disconnect(container); 3379fca2b7dSJohn Levon } 3389fca2b7dSJohn Levon 3399fca2b7dSJohn Levon static int vfio_user_pci_hot_reset(VFIODevice *vbasedev, bool single) 3409fca2b7dSJohn Levon { 3419fca2b7dSJohn Levon /* ->needs_reset is always false for vfio-user. */ 3429fca2b7dSJohn Levon return 0; 3439fca2b7dSJohn Levon } 3449fca2b7dSJohn Levon 3459fca2b7dSJohn Levon static void vfio_iommu_user_class_init(ObjectClass *klass, const void *data) 3469fca2b7dSJohn Levon { 3479fca2b7dSJohn Levon VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); 3489fca2b7dSJohn Levon 3499fca2b7dSJohn Levon vioc->setup = vfio_user_setup; 350*18e899e6SJohn Levon vioc->listener_begin = vfio_user_listener_begin, 351*18e899e6SJohn Levon vioc->listener_commit = vfio_user_listener_commit, 3529fca2b7dSJohn Levon vioc->dma_map = vfio_user_dma_map; 3539fca2b7dSJohn Levon vioc->dma_unmap = vfio_user_dma_unmap; 3549fca2b7dSJohn Levon vioc->attach_device = vfio_user_device_attach; 3559fca2b7dSJohn Levon vioc->detach_device = vfio_user_device_detach; 3569fca2b7dSJohn Levon vioc->set_dirty_page_tracking = vfio_user_set_dirty_page_tracking; 3579fca2b7dSJohn Levon vioc->query_dirty_bitmap = vfio_user_query_dirty_bitmap; 3589fca2b7dSJohn Levon vioc->pci_hot_reset = vfio_user_pci_hot_reset; 3599fca2b7dSJohn Levon }; 3609fca2b7dSJohn Levon 3619fca2b7dSJohn Levon static const TypeInfo types[] = { 3629fca2b7dSJohn Levon { 3639fca2b7dSJohn Levon .name = TYPE_VFIO_IOMMU_USER, 3649fca2b7dSJohn Levon .parent = TYPE_VFIO_IOMMU, 3659fca2b7dSJohn Levon .instance_size = sizeof(VFIOUserContainer), 3669fca2b7dSJohn Levon .class_init = vfio_iommu_user_class_init, 3679fca2b7dSJohn Levon }, 3689fca2b7dSJohn Levon }; 3699fca2b7dSJohn Levon 3709fca2b7dSJohn Levon DEFINE_TYPES(types) 371