19fca2b7dSJohn Levon /* 29fca2b7dSJohn Levon * Container for vfio-user IOMMU type: rather than communicating with the kernel 39fca2b7dSJohn Levon * vfio driver, we communicate over a socket to a server using the vfio-user 49fca2b7dSJohn Levon * protocol. 59fca2b7dSJohn Levon * 69fca2b7dSJohn Levon * SPDX-License-Identifier: GPL-2.0-or-later 79fca2b7dSJohn Levon */ 89fca2b7dSJohn Levon 99fca2b7dSJohn Levon #include <sys/ioctl.h> 109fca2b7dSJohn Levon #include <linux/vfio.h> 119fca2b7dSJohn Levon #include "qemu/osdep.h" 129fca2b7dSJohn Levon 139fca2b7dSJohn Levon #include "hw/vfio-user/container.h" 143bdb738bSJohn Levon #include "hw/vfio-user/device.h" 1518e899e6SJohn Levon #include "hw/vfio-user/trace.h" 169fca2b7dSJohn Levon #include "hw/vfio/vfio-device.h" 179fca2b7dSJohn Levon #include "hw/vfio/vfio-listener.h" 189fca2b7dSJohn Levon #include "qapi/error.h" 199fca2b7dSJohn Levon 2018e899e6SJohn Levon /* 2118e899e6SJohn Levon * When DMA space is the physical address space, the region add/del listeners 2218e899e6SJohn Levon * will fire during memory update transactions. These depend on BQL being held, 2318e899e6SJohn Levon * so do any resulting map/demap ops async while keeping BQL. 2418e899e6SJohn Levon */ 2518e899e6SJohn Levon static void vfio_user_listener_begin(VFIOContainerBase *bcontainer) 2618e899e6SJohn Levon { 2718e899e6SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, 2818e899e6SJohn Levon bcontainer); 2918e899e6SJohn Levon 3018e899e6SJohn Levon container->proxy->async_ops = true; 3118e899e6SJohn Levon } 3218e899e6SJohn Levon 3318e899e6SJohn Levon static void vfio_user_listener_commit(VFIOContainerBase *bcontainer) 3418e899e6SJohn Levon { 3518e899e6SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, 3618e899e6SJohn Levon bcontainer); 3718e899e6SJohn Levon 3818e899e6SJohn Levon /* wait here for any async requests sent during the transaction */ 3918e899e6SJohn Levon container->proxy->async_ops = false; 4018e899e6SJohn Levon vfio_user_wait_reqs(container->proxy); 4118e899e6SJohn Levon } 4218e899e6SJohn Levon 439fca2b7dSJohn Levon static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer, 449fca2b7dSJohn Levon hwaddr iova, ram_addr_t size, 459fca2b7dSJohn Levon IOMMUTLBEntry *iotlb, bool unmap_all) 469fca2b7dSJohn Levon { 4718e899e6SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, 4818e899e6SJohn Levon bcontainer); 4918e899e6SJohn Levon Error *local_err = NULL; 5018e899e6SJohn Levon int ret = 0; 5118e899e6SJohn Levon 5218e899e6SJohn Levon VFIOUserDMAUnmap *msgp = g_malloc(sizeof(*msgp)); 5318e899e6SJohn Levon 5418e899e6SJohn Levon vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_UNMAP, sizeof(*msgp), 0); 5518e899e6SJohn Levon msgp->argsz = sizeof(struct vfio_iommu_type1_dma_unmap); 5618e899e6SJohn Levon msgp->flags = unmap_all ? VFIO_DMA_UNMAP_FLAG_ALL : 0; 5718e899e6SJohn Levon msgp->iova = iova; 5818e899e6SJohn Levon msgp->size = size; 5918e899e6SJohn Levon trace_vfio_user_dma_unmap(msgp->iova, msgp->size, msgp->flags, 6018e899e6SJohn Levon container->proxy->async_ops); 6118e899e6SJohn Levon 6218e899e6SJohn Levon if (container->proxy->async_ops) { 6318e899e6SJohn Levon if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, NULL, 6418e899e6SJohn Levon 0, &local_err)) { 6518e899e6SJohn Levon error_report_err(local_err); 6618e899e6SJohn Levon ret = -EFAULT; 6718e899e6SJohn Levon } else { 6818e899e6SJohn Levon ret = 0; 6918e899e6SJohn Levon } 7018e899e6SJohn Levon } else { 7118e899e6SJohn Levon if (!vfio_user_send_wait(container->proxy, &msgp->hdr, NULL, 7218e899e6SJohn Levon 0, &local_err)) { 7318e899e6SJohn Levon error_report_err(local_err); 7418e899e6SJohn Levon ret = -EFAULT; 7518e899e6SJohn Levon } 7618e899e6SJohn Levon 7718e899e6SJohn Levon if (msgp->hdr.flags & VFIO_USER_ERROR) { 7818e899e6SJohn Levon ret = -msgp->hdr.error_reply; 7918e899e6SJohn Levon } 8018e899e6SJohn Levon 8118e899e6SJohn Levon g_free(msgp); 8218e899e6SJohn Levon } 8318e899e6SJohn Levon 8418e899e6SJohn Levon return ret; 859fca2b7dSJohn Levon } 869fca2b7dSJohn Levon 879fca2b7dSJohn Levon static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, 889fca2b7dSJohn Levon ram_addr_t size, void *vaddr, bool readonly, 899fca2b7dSJohn Levon MemoryRegion *mrp) 909fca2b7dSJohn Levon { 9118e899e6SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, 9218e899e6SJohn Levon bcontainer); 9318e899e6SJohn Levon int fd = memory_region_get_fd(mrp); 9418e899e6SJohn Levon Error *local_err = NULL; 9518e899e6SJohn Levon int ret; 9618e899e6SJohn Levon 9718e899e6SJohn Levon VFIOUserFDs *fds = NULL; 9818e899e6SJohn Levon VFIOUserDMAMap *msgp = g_malloc0(sizeof(*msgp)); 9918e899e6SJohn Levon 10018e899e6SJohn Levon vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_MAP, sizeof(*msgp), 0); 10118e899e6SJohn Levon msgp->argsz = sizeof(struct vfio_iommu_type1_dma_map); 10218e899e6SJohn Levon msgp->flags = VFIO_DMA_MAP_FLAG_READ; 10318e899e6SJohn Levon msgp->offset = 0; 10418e899e6SJohn Levon msgp->iova = iova; 10518e899e6SJohn Levon msgp->size = size; 10618e899e6SJohn Levon 10718e899e6SJohn Levon /* 10818e899e6SJohn Levon * vaddr enters as a QEMU process address; make it either a file offset 10918e899e6SJohn Levon * for mapped areas or leave as 0. 11018e899e6SJohn Levon */ 11118e899e6SJohn Levon if (fd != -1) { 11218e899e6SJohn Levon msgp->offset = qemu_ram_block_host_offset(mrp->ram_block, vaddr); 11318e899e6SJohn Levon } 11418e899e6SJohn Levon 11518e899e6SJohn Levon if (!readonly) { 11618e899e6SJohn Levon msgp->flags |= VFIO_DMA_MAP_FLAG_WRITE; 11718e899e6SJohn Levon } 11818e899e6SJohn Levon 11918e899e6SJohn Levon trace_vfio_user_dma_map(msgp->iova, msgp->size, msgp->offset, msgp->flags, 12018e899e6SJohn Levon container->proxy->async_ops); 12118e899e6SJohn Levon 12218e899e6SJohn Levon /* 12318e899e6SJohn Levon * The async_ops case sends without blocking. They're later waited for in 12418e899e6SJohn Levon * vfio_send_wait_reqs. 12518e899e6SJohn Levon */ 12618e899e6SJohn Levon if (container->proxy->async_ops) { 12718e899e6SJohn Levon /* can't use auto variable since we don't block */ 12818e899e6SJohn Levon if (fd != -1) { 12918e899e6SJohn Levon fds = vfio_user_getfds(1); 13018e899e6SJohn Levon fds->send_fds = 1; 13118e899e6SJohn Levon fds->fds[0] = fd; 13218e899e6SJohn Levon } 13318e899e6SJohn Levon 13418e899e6SJohn Levon if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, fds, 13518e899e6SJohn Levon 0, &local_err)) { 13618e899e6SJohn Levon error_report_err(local_err); 13718e899e6SJohn Levon ret = -EFAULT; 13818e899e6SJohn Levon } else { 13918e899e6SJohn Levon ret = 0; 14018e899e6SJohn Levon } 14118e899e6SJohn Levon } else { 14218e899e6SJohn Levon VFIOUserFDs local_fds = { 1, 0, &fd }; 14318e899e6SJohn Levon 14418e899e6SJohn Levon fds = fd != -1 ? &local_fds : NULL; 14518e899e6SJohn Levon 14618e899e6SJohn Levon if (!vfio_user_send_wait(container->proxy, &msgp->hdr, fds, 14718e899e6SJohn Levon 0, &local_err)) { 14818e899e6SJohn Levon error_report_err(local_err); 14918e899e6SJohn Levon ret = -EFAULT; 15018e899e6SJohn Levon } 15118e899e6SJohn Levon 15218e899e6SJohn Levon if (msgp->hdr.flags & VFIO_USER_ERROR) { 15318e899e6SJohn Levon ret = -msgp->hdr.error_reply; 15418e899e6SJohn Levon } 15518e899e6SJohn Levon 15618e899e6SJohn Levon g_free(msgp); 15718e899e6SJohn Levon } 15818e899e6SJohn Levon 15918e899e6SJohn Levon return ret; 1609fca2b7dSJohn Levon } 1619fca2b7dSJohn Levon 1629fca2b7dSJohn Levon static int 1639fca2b7dSJohn Levon vfio_user_set_dirty_page_tracking(const VFIOContainerBase *bcontainer, 1649fca2b7dSJohn Levon bool start, Error **errp) 1659fca2b7dSJohn Levon { 1669fca2b7dSJohn Levon error_setg_errno(errp, ENOTSUP, "Not supported"); 1679fca2b7dSJohn Levon return -ENOTSUP; 1689fca2b7dSJohn Levon } 1699fca2b7dSJohn Levon 1709fca2b7dSJohn Levon static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer, 1719fca2b7dSJohn Levon VFIOBitmap *vbmap, hwaddr iova, 1729fca2b7dSJohn Levon hwaddr size, Error **errp) 1739fca2b7dSJohn Levon { 1749fca2b7dSJohn Levon error_setg_errno(errp, ENOTSUP, "Not supported"); 1759fca2b7dSJohn Levon return -ENOTSUP; 1769fca2b7dSJohn Levon } 1779fca2b7dSJohn Levon 1789fca2b7dSJohn Levon static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp) 1799fca2b7dSJohn Levon { 18052ce9c35SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, 18152ce9c35SJohn Levon bcontainer); 18252ce9c35SJohn Levon 18352ce9c35SJohn Levon assert(container->proxy->dma_pgsizes != 0); 18452ce9c35SJohn Levon bcontainer->pgsizes = container->proxy->dma_pgsizes; 18552ce9c35SJohn Levon bcontainer->dma_max_mappings = container->proxy->max_dma; 18652ce9c35SJohn Levon 18752ce9c35SJohn Levon /* No live migration support yet. */ 18852ce9c35SJohn Levon bcontainer->dirty_pages_supported = false; 18952ce9c35SJohn Levon bcontainer->max_dirty_bitmap_size = container->proxy->max_bitmap; 19052ce9c35SJohn Levon bcontainer->dirty_pgsizes = container->proxy->migr_pgsize; 19152ce9c35SJohn Levon 19252ce9c35SJohn Levon return true; 1939fca2b7dSJohn Levon } 1949fca2b7dSJohn Levon 19552ce9c35SJohn Levon static VFIOUserContainer *vfio_user_create_container(VFIODevice *vbasedev, 19652ce9c35SJohn Levon Error **errp) 1979fca2b7dSJohn Levon { 1989fca2b7dSJohn Levon VFIOUserContainer *container; 1999fca2b7dSJohn Levon 2009fca2b7dSJohn Levon container = VFIO_IOMMU_USER(object_new(TYPE_VFIO_IOMMU_USER)); 20152ce9c35SJohn Levon container->proxy = vbasedev->proxy; 2029fca2b7dSJohn Levon return container; 2039fca2b7dSJohn Levon } 2049fca2b7dSJohn Levon 2059fca2b7dSJohn Levon /* 2069fca2b7dSJohn Levon * Try to mirror vfio_container_connect() as much as possible. 2079fca2b7dSJohn Levon */ 2089fca2b7dSJohn Levon static VFIOUserContainer * 20952ce9c35SJohn Levon vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev, 21052ce9c35SJohn Levon Error **errp) 2119fca2b7dSJohn Levon { 2129fca2b7dSJohn Levon VFIOContainerBase *bcontainer; 2139fca2b7dSJohn Levon VFIOUserContainer *container; 2149fca2b7dSJohn Levon VFIOAddressSpace *space; 2159fca2b7dSJohn Levon VFIOIOMMUClass *vioc; 21652ce9c35SJohn Levon int ret; 2179fca2b7dSJohn Levon 2189fca2b7dSJohn Levon space = vfio_address_space_get(as); 2199fca2b7dSJohn Levon 22052ce9c35SJohn Levon container = vfio_user_create_container(vbasedev, errp); 2219fca2b7dSJohn Levon if (!container) { 2229fca2b7dSJohn Levon goto put_space_exit; 2239fca2b7dSJohn Levon } 2249fca2b7dSJohn Levon 2259fca2b7dSJohn Levon bcontainer = &container->bcontainer; 2269fca2b7dSJohn Levon 22752ce9c35SJohn Levon ret = ram_block_uncoordinated_discard_disable(true); 22852ce9c35SJohn Levon if (ret) { 22952ce9c35SJohn Levon error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); 230*f7c5dff2SMark Cave-Ayland goto free_container_exit; 23152ce9c35SJohn Levon } 23252ce9c35SJohn Levon 2339fca2b7dSJohn Levon vioc = VFIO_IOMMU_GET_CLASS(bcontainer); 2349fca2b7dSJohn Levon assert(vioc->setup); 2359fca2b7dSJohn Levon 2369fca2b7dSJohn Levon if (!vioc->setup(bcontainer, errp)) { 23752ce9c35SJohn Levon goto enable_discards_exit; 2389fca2b7dSJohn Levon } 2399fca2b7dSJohn Levon 2409fca2b7dSJohn Levon vfio_address_space_insert(space, bcontainer); 2419fca2b7dSJohn Levon 2429fca2b7dSJohn Levon if (!vfio_listener_register(bcontainer, errp)) { 2439fca2b7dSJohn Levon goto listener_release_exit; 2449fca2b7dSJohn Levon } 2459fca2b7dSJohn Levon 2469fca2b7dSJohn Levon bcontainer->initialized = true; 2479fca2b7dSJohn Levon 2489fca2b7dSJohn Levon return container; 2499fca2b7dSJohn Levon 2509fca2b7dSJohn Levon listener_release_exit: 2519fca2b7dSJohn Levon vfio_listener_unregister(bcontainer); 2529fca2b7dSJohn Levon if (vioc->release) { 2539fca2b7dSJohn Levon vioc->release(bcontainer); 2549fca2b7dSJohn Levon } 2559fca2b7dSJohn Levon 25652ce9c35SJohn Levon enable_discards_exit: 25752ce9c35SJohn Levon ram_block_uncoordinated_discard_disable(false); 25852ce9c35SJohn Levon 2599fca2b7dSJohn Levon free_container_exit: 2609fca2b7dSJohn Levon object_unref(container); 2619fca2b7dSJohn Levon 2629fca2b7dSJohn Levon put_space_exit: 2639fca2b7dSJohn Levon vfio_address_space_put(space); 2649fca2b7dSJohn Levon 2659fca2b7dSJohn Levon return NULL; 2669fca2b7dSJohn Levon } 2679fca2b7dSJohn Levon 2689fca2b7dSJohn Levon static void vfio_user_container_disconnect(VFIOUserContainer *container) 2699fca2b7dSJohn Levon { 2709fca2b7dSJohn Levon VFIOContainerBase *bcontainer = &container->bcontainer; 2719fca2b7dSJohn Levon VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); 27252ce9c35SJohn Levon VFIOAddressSpace *space = bcontainer->space; 27352ce9c35SJohn Levon 27452ce9c35SJohn Levon ram_block_uncoordinated_discard_disable(false); 2759fca2b7dSJohn Levon 2769fca2b7dSJohn Levon vfio_listener_unregister(bcontainer); 2779fca2b7dSJohn Levon if (vioc->release) { 2789fca2b7dSJohn Levon vioc->release(bcontainer); 2799fca2b7dSJohn Levon } 2809fca2b7dSJohn Levon 2819fca2b7dSJohn Levon object_unref(container); 2829fca2b7dSJohn Levon 2839fca2b7dSJohn Levon vfio_address_space_put(space); 2849fca2b7dSJohn Levon } 2859fca2b7dSJohn Levon 2869fca2b7dSJohn Levon static bool vfio_user_device_get(VFIOUserContainer *container, 2879fca2b7dSJohn Levon VFIODevice *vbasedev, Error **errp) 2889fca2b7dSJohn Levon { 2893bdb738bSJohn Levon struct vfio_device_info info = { .argsz = sizeof(info) }; 2903bdb738bSJohn Levon 2913bdb738bSJohn Levon 2923bdb738bSJohn Levon if (!vfio_user_get_device_info(vbasedev->proxy, &info, errp)) { 2933bdb738bSJohn Levon return false; 2943bdb738bSJohn Levon } 2959fca2b7dSJohn Levon 2969fca2b7dSJohn Levon vbasedev->fd = -1; 2979fca2b7dSJohn Levon 2989fca2b7dSJohn Levon vfio_device_prepare(vbasedev, &container->bcontainer, &info); 2999fca2b7dSJohn Levon 3009fca2b7dSJohn Levon return true; 3019fca2b7dSJohn Levon } 3029fca2b7dSJohn Levon 3039fca2b7dSJohn Levon /* 3049fca2b7dSJohn Levon * vfio_user_device_attach: attach a device to a new container. 3059fca2b7dSJohn Levon */ 3069fca2b7dSJohn Levon static bool vfio_user_device_attach(const char *name, VFIODevice *vbasedev, 3079fca2b7dSJohn Levon AddressSpace *as, Error **errp) 3089fca2b7dSJohn Levon { 3099fca2b7dSJohn Levon VFIOUserContainer *container; 3109fca2b7dSJohn Levon 31152ce9c35SJohn Levon container = vfio_user_container_connect(as, vbasedev, errp); 3129fca2b7dSJohn Levon if (container == NULL) { 3139fca2b7dSJohn Levon error_prepend(errp, "failed to connect proxy"); 3149fca2b7dSJohn Levon return false; 3159fca2b7dSJohn Levon } 3169fca2b7dSJohn Levon 3179fca2b7dSJohn Levon return vfio_user_device_get(container, vbasedev, errp); 3189fca2b7dSJohn Levon } 3199fca2b7dSJohn Levon 3209fca2b7dSJohn Levon static void vfio_user_device_detach(VFIODevice *vbasedev) 3219fca2b7dSJohn Levon { 3229fca2b7dSJohn Levon VFIOUserContainer *container = container_of(vbasedev->bcontainer, 3239fca2b7dSJohn Levon VFIOUserContainer, bcontainer); 3249fca2b7dSJohn Levon 3259fca2b7dSJohn Levon vfio_device_unprepare(vbasedev); 3269fca2b7dSJohn Levon 3279fca2b7dSJohn Levon vfio_user_container_disconnect(container); 3289fca2b7dSJohn Levon } 3299fca2b7dSJohn Levon 3309fca2b7dSJohn Levon static int vfio_user_pci_hot_reset(VFIODevice *vbasedev, bool single) 3319fca2b7dSJohn Levon { 3329fca2b7dSJohn Levon /* ->needs_reset is always false for vfio-user. */ 3339fca2b7dSJohn Levon return 0; 3349fca2b7dSJohn Levon } 3359fca2b7dSJohn Levon 3369fca2b7dSJohn Levon static void vfio_iommu_user_class_init(ObjectClass *klass, const void *data) 3379fca2b7dSJohn Levon { 3389fca2b7dSJohn Levon VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); 3399fca2b7dSJohn Levon 3409fca2b7dSJohn Levon vioc->setup = vfio_user_setup; 34118e899e6SJohn Levon vioc->listener_begin = vfio_user_listener_begin, 34218e899e6SJohn Levon vioc->listener_commit = vfio_user_listener_commit, 3439fca2b7dSJohn Levon vioc->dma_map = vfio_user_dma_map; 3449fca2b7dSJohn Levon vioc->dma_unmap = vfio_user_dma_unmap; 3459fca2b7dSJohn Levon vioc->attach_device = vfio_user_device_attach; 3469fca2b7dSJohn Levon vioc->detach_device = vfio_user_device_detach; 3479fca2b7dSJohn Levon vioc->set_dirty_page_tracking = vfio_user_set_dirty_page_tracking; 3489fca2b7dSJohn Levon vioc->query_dirty_bitmap = vfio_user_query_dirty_bitmap; 3499fca2b7dSJohn Levon vioc->pci_hot_reset = vfio_user_pci_hot_reset; 3509fca2b7dSJohn Levon }; 3519fca2b7dSJohn Levon 3529fca2b7dSJohn Levon static const TypeInfo types[] = { 3539fca2b7dSJohn Levon { 3549fca2b7dSJohn Levon .name = TYPE_VFIO_IOMMU_USER, 3559fca2b7dSJohn Levon .parent = TYPE_VFIO_IOMMU, 3569fca2b7dSJohn Levon .instance_size = sizeof(VFIOUserContainer), 3579fca2b7dSJohn Levon .class_init = vfio_iommu_user_class_init, 3589fca2b7dSJohn Levon }, 3599fca2b7dSJohn Levon }; 3609fca2b7dSJohn Levon 3619fca2b7dSJohn Levon DEFINE_TYPES(types) 362