19fca2b7dSJohn Levon /*
29fca2b7dSJohn Levon * Container for vfio-user IOMMU type: rather than communicating with the kernel
39fca2b7dSJohn Levon * vfio driver, we communicate over a socket to a server using the vfio-user
49fca2b7dSJohn Levon * protocol.
59fca2b7dSJohn Levon *
69fca2b7dSJohn Levon * SPDX-License-Identifier: GPL-2.0-or-later
79fca2b7dSJohn Levon */
89fca2b7dSJohn Levon
99fca2b7dSJohn Levon #include <sys/ioctl.h>
109fca2b7dSJohn Levon #include <linux/vfio.h>
119fca2b7dSJohn Levon #include "qemu/osdep.h"
129fca2b7dSJohn Levon
139fca2b7dSJohn Levon #include "hw/vfio-user/container.h"
143bdb738bSJohn Levon #include "hw/vfio-user/device.h"
1518e899e6SJohn Levon #include "hw/vfio-user/trace.h"
169fca2b7dSJohn Levon #include "hw/vfio/vfio-device.h"
179fca2b7dSJohn Levon #include "hw/vfio/vfio-listener.h"
189fca2b7dSJohn Levon #include "qapi/error.h"
199fca2b7dSJohn Levon
2018e899e6SJohn Levon /*
2118e899e6SJohn Levon * When DMA space is the physical address space, the region add/del listeners
2218e899e6SJohn Levon * will fire during memory update transactions. These depend on BQL being held,
2318e899e6SJohn Levon * so do any resulting map/demap ops async while keeping BQL.
2418e899e6SJohn Levon */
vfio_user_listener_begin(VFIOContainerBase * bcontainer)2518e899e6SJohn Levon static void vfio_user_listener_begin(VFIOContainerBase *bcontainer)
2618e899e6SJohn Levon {
2718e899e6SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
2818e899e6SJohn Levon bcontainer);
2918e899e6SJohn Levon
3018e899e6SJohn Levon container->proxy->async_ops = true;
3118e899e6SJohn Levon }
3218e899e6SJohn Levon
vfio_user_listener_commit(VFIOContainerBase * bcontainer)3318e899e6SJohn Levon static void vfio_user_listener_commit(VFIOContainerBase *bcontainer)
3418e899e6SJohn Levon {
3518e899e6SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
3618e899e6SJohn Levon bcontainer);
3718e899e6SJohn Levon
3818e899e6SJohn Levon /* wait here for any async requests sent during the transaction */
3918e899e6SJohn Levon container->proxy->async_ops = false;
4018e899e6SJohn Levon vfio_user_wait_reqs(container->proxy);
4118e899e6SJohn Levon }
4218e899e6SJohn Levon
vfio_user_dma_unmap(const VFIOContainerBase * bcontainer,hwaddr iova,ram_addr_t size,IOMMUTLBEntry * iotlb,bool unmap_all)439fca2b7dSJohn Levon static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer,
449fca2b7dSJohn Levon hwaddr iova, ram_addr_t size,
459fca2b7dSJohn Levon IOMMUTLBEntry *iotlb, bool unmap_all)
469fca2b7dSJohn Levon {
4718e899e6SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
4818e899e6SJohn Levon bcontainer);
4918e899e6SJohn Levon Error *local_err = NULL;
5018e899e6SJohn Levon int ret = 0;
5118e899e6SJohn Levon
5218e899e6SJohn Levon VFIOUserDMAUnmap *msgp = g_malloc(sizeof(*msgp));
5318e899e6SJohn Levon
5418e899e6SJohn Levon vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_UNMAP, sizeof(*msgp), 0);
5518e899e6SJohn Levon msgp->argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
5618e899e6SJohn Levon msgp->flags = unmap_all ? VFIO_DMA_UNMAP_FLAG_ALL : 0;
5718e899e6SJohn Levon msgp->iova = iova;
5818e899e6SJohn Levon msgp->size = size;
5918e899e6SJohn Levon trace_vfio_user_dma_unmap(msgp->iova, msgp->size, msgp->flags,
6018e899e6SJohn Levon container->proxy->async_ops);
6118e899e6SJohn Levon
6218e899e6SJohn Levon if (container->proxy->async_ops) {
6318e899e6SJohn Levon if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, NULL,
6418e899e6SJohn Levon 0, &local_err)) {
6518e899e6SJohn Levon error_report_err(local_err);
6618e899e6SJohn Levon ret = -EFAULT;
6718e899e6SJohn Levon } else {
6818e899e6SJohn Levon ret = 0;
6918e899e6SJohn Levon }
7018e899e6SJohn Levon } else {
7118e899e6SJohn Levon if (!vfio_user_send_wait(container->proxy, &msgp->hdr, NULL,
7218e899e6SJohn Levon 0, &local_err)) {
7318e899e6SJohn Levon error_report_err(local_err);
7418e899e6SJohn Levon ret = -EFAULT;
7518e899e6SJohn Levon }
7618e899e6SJohn Levon
7718e899e6SJohn Levon if (msgp->hdr.flags & VFIO_USER_ERROR) {
7818e899e6SJohn Levon ret = -msgp->hdr.error_reply;
7918e899e6SJohn Levon }
8018e899e6SJohn Levon
8118e899e6SJohn Levon g_free(msgp);
8218e899e6SJohn Levon }
8318e899e6SJohn Levon
8418e899e6SJohn Levon return ret;
859fca2b7dSJohn Levon }
869fca2b7dSJohn Levon
vfio_user_dma_map(const VFIOContainerBase * bcontainer,hwaddr iova,ram_addr_t size,void * vaddr,bool readonly,MemoryRegion * mrp)879fca2b7dSJohn Levon static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
889fca2b7dSJohn Levon ram_addr_t size, void *vaddr, bool readonly,
899fca2b7dSJohn Levon MemoryRegion *mrp)
909fca2b7dSJohn Levon {
9118e899e6SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
9218e899e6SJohn Levon bcontainer);
9318e899e6SJohn Levon int fd = memory_region_get_fd(mrp);
9418e899e6SJohn Levon Error *local_err = NULL;
9518e899e6SJohn Levon int ret;
9618e899e6SJohn Levon
9718e899e6SJohn Levon VFIOUserFDs *fds = NULL;
9818e899e6SJohn Levon VFIOUserDMAMap *msgp = g_malloc0(sizeof(*msgp));
9918e899e6SJohn Levon
10018e899e6SJohn Levon vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_MAP, sizeof(*msgp), 0);
10118e899e6SJohn Levon msgp->argsz = sizeof(struct vfio_iommu_type1_dma_map);
10218e899e6SJohn Levon msgp->flags = VFIO_DMA_MAP_FLAG_READ;
10318e899e6SJohn Levon msgp->offset = 0;
10418e899e6SJohn Levon msgp->iova = iova;
10518e899e6SJohn Levon msgp->size = size;
10618e899e6SJohn Levon
10718e899e6SJohn Levon /*
10818e899e6SJohn Levon * vaddr enters as a QEMU process address; make it either a file offset
10918e899e6SJohn Levon * for mapped areas or leave as 0.
11018e899e6SJohn Levon */
11118e899e6SJohn Levon if (fd != -1) {
11218e899e6SJohn Levon msgp->offset = qemu_ram_block_host_offset(mrp->ram_block, vaddr);
11318e899e6SJohn Levon }
11418e899e6SJohn Levon
11518e899e6SJohn Levon if (!readonly) {
11618e899e6SJohn Levon msgp->flags |= VFIO_DMA_MAP_FLAG_WRITE;
11718e899e6SJohn Levon }
11818e899e6SJohn Levon
11918e899e6SJohn Levon trace_vfio_user_dma_map(msgp->iova, msgp->size, msgp->offset, msgp->flags,
12018e899e6SJohn Levon container->proxy->async_ops);
12118e899e6SJohn Levon
12218e899e6SJohn Levon /*
12318e899e6SJohn Levon * The async_ops case sends without blocking. They're later waited for in
12418e899e6SJohn Levon * vfio_send_wait_reqs.
12518e899e6SJohn Levon */
12618e899e6SJohn Levon if (container->proxy->async_ops) {
12718e899e6SJohn Levon /* can't use auto variable since we don't block */
12818e899e6SJohn Levon if (fd != -1) {
12918e899e6SJohn Levon fds = vfio_user_getfds(1);
13018e899e6SJohn Levon fds->send_fds = 1;
13118e899e6SJohn Levon fds->fds[0] = fd;
13218e899e6SJohn Levon }
13318e899e6SJohn Levon
13418e899e6SJohn Levon if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, fds,
13518e899e6SJohn Levon 0, &local_err)) {
13618e899e6SJohn Levon error_report_err(local_err);
13718e899e6SJohn Levon ret = -EFAULT;
13818e899e6SJohn Levon } else {
13918e899e6SJohn Levon ret = 0;
14018e899e6SJohn Levon }
14118e899e6SJohn Levon } else {
14218e899e6SJohn Levon VFIOUserFDs local_fds = { 1, 0, &fd };
14318e899e6SJohn Levon
14418e899e6SJohn Levon fds = fd != -1 ? &local_fds : NULL;
14518e899e6SJohn Levon
14618e899e6SJohn Levon if (!vfio_user_send_wait(container->proxy, &msgp->hdr, fds,
14718e899e6SJohn Levon 0, &local_err)) {
14818e899e6SJohn Levon error_report_err(local_err);
14918e899e6SJohn Levon ret = -EFAULT;
15018e899e6SJohn Levon }
15118e899e6SJohn Levon
15218e899e6SJohn Levon if (msgp->hdr.flags & VFIO_USER_ERROR) {
15318e899e6SJohn Levon ret = -msgp->hdr.error_reply;
15418e899e6SJohn Levon }
15518e899e6SJohn Levon
15618e899e6SJohn Levon g_free(msgp);
15718e899e6SJohn Levon }
15818e899e6SJohn Levon
15918e899e6SJohn Levon return ret;
1609fca2b7dSJohn Levon }
1619fca2b7dSJohn Levon
1629fca2b7dSJohn Levon static int
vfio_user_set_dirty_page_tracking(const VFIOContainerBase * bcontainer,bool start,Error ** errp)1639fca2b7dSJohn Levon vfio_user_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
1649fca2b7dSJohn Levon bool start, Error **errp)
1659fca2b7dSJohn Levon {
1669fca2b7dSJohn Levon error_setg_errno(errp, ENOTSUP, "Not supported");
1679fca2b7dSJohn Levon return -ENOTSUP;
1689fca2b7dSJohn Levon }
1699fca2b7dSJohn Levon
vfio_user_query_dirty_bitmap(const VFIOContainerBase * bcontainer,VFIOBitmap * vbmap,hwaddr iova,hwaddr size,Error ** errp)1709fca2b7dSJohn Levon static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
1719fca2b7dSJohn Levon VFIOBitmap *vbmap, hwaddr iova,
1729fca2b7dSJohn Levon hwaddr size, Error **errp)
1739fca2b7dSJohn Levon {
1749fca2b7dSJohn Levon error_setg_errno(errp, ENOTSUP, "Not supported");
1759fca2b7dSJohn Levon return -ENOTSUP;
1769fca2b7dSJohn Levon }
1779fca2b7dSJohn Levon
vfio_user_setup(VFIOContainerBase * bcontainer,Error ** errp)1789fca2b7dSJohn Levon static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp)
1799fca2b7dSJohn Levon {
18052ce9c35SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
18152ce9c35SJohn Levon bcontainer);
18252ce9c35SJohn Levon
18352ce9c35SJohn Levon assert(container->proxy->dma_pgsizes != 0);
18452ce9c35SJohn Levon bcontainer->pgsizes = container->proxy->dma_pgsizes;
18552ce9c35SJohn Levon bcontainer->dma_max_mappings = container->proxy->max_dma;
18652ce9c35SJohn Levon
18752ce9c35SJohn Levon /* No live migration support yet. */
18852ce9c35SJohn Levon bcontainer->dirty_pages_supported = false;
18952ce9c35SJohn Levon bcontainer->max_dirty_bitmap_size = container->proxy->max_bitmap;
19052ce9c35SJohn Levon bcontainer->dirty_pgsizes = container->proxy->migr_pgsize;
19152ce9c35SJohn Levon
19252ce9c35SJohn Levon return true;
1939fca2b7dSJohn Levon }
1949fca2b7dSJohn Levon
vfio_user_create_container(VFIODevice * vbasedev,Error ** errp)19552ce9c35SJohn Levon static VFIOUserContainer *vfio_user_create_container(VFIODevice *vbasedev,
19652ce9c35SJohn Levon Error **errp)
1979fca2b7dSJohn Levon {
1989fca2b7dSJohn Levon VFIOUserContainer *container;
1999fca2b7dSJohn Levon
2009fca2b7dSJohn Levon container = VFIO_IOMMU_USER(object_new(TYPE_VFIO_IOMMU_USER));
20152ce9c35SJohn Levon container->proxy = vbasedev->proxy;
2029fca2b7dSJohn Levon return container;
2039fca2b7dSJohn Levon }
2049fca2b7dSJohn Levon
2059fca2b7dSJohn Levon /*
2069fca2b7dSJohn Levon * Try to mirror vfio_container_connect() as much as possible.
2079fca2b7dSJohn Levon */
2089fca2b7dSJohn Levon static VFIOUserContainer *
vfio_user_container_connect(AddressSpace * as,VFIODevice * vbasedev,Error ** errp)20952ce9c35SJohn Levon vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev,
21052ce9c35SJohn Levon Error **errp)
2119fca2b7dSJohn Levon {
2129fca2b7dSJohn Levon VFIOContainerBase *bcontainer;
2139fca2b7dSJohn Levon VFIOUserContainer *container;
2149fca2b7dSJohn Levon VFIOAddressSpace *space;
2159fca2b7dSJohn Levon VFIOIOMMUClass *vioc;
21652ce9c35SJohn Levon int ret;
2179fca2b7dSJohn Levon
2189fca2b7dSJohn Levon space = vfio_address_space_get(as);
2199fca2b7dSJohn Levon
22052ce9c35SJohn Levon container = vfio_user_create_container(vbasedev, errp);
2219fca2b7dSJohn Levon if (!container) {
2229fca2b7dSJohn Levon goto put_space_exit;
2239fca2b7dSJohn Levon }
2249fca2b7dSJohn Levon
2259fca2b7dSJohn Levon bcontainer = &container->bcontainer;
2269fca2b7dSJohn Levon
22752ce9c35SJohn Levon ret = ram_block_uncoordinated_discard_disable(true);
22852ce9c35SJohn Levon if (ret) {
22952ce9c35SJohn Levon error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
230*f7c5dff2SMark Cave-Ayland goto free_container_exit;
23152ce9c35SJohn Levon }
23252ce9c35SJohn Levon
2339fca2b7dSJohn Levon vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
2349fca2b7dSJohn Levon assert(vioc->setup);
2359fca2b7dSJohn Levon
2369fca2b7dSJohn Levon if (!vioc->setup(bcontainer, errp)) {
23752ce9c35SJohn Levon goto enable_discards_exit;
2389fca2b7dSJohn Levon }
2399fca2b7dSJohn Levon
2409fca2b7dSJohn Levon vfio_address_space_insert(space, bcontainer);
2419fca2b7dSJohn Levon
2429fca2b7dSJohn Levon if (!vfio_listener_register(bcontainer, errp)) {
2439fca2b7dSJohn Levon goto listener_release_exit;
2449fca2b7dSJohn Levon }
2459fca2b7dSJohn Levon
2469fca2b7dSJohn Levon bcontainer->initialized = true;
2479fca2b7dSJohn Levon
2489fca2b7dSJohn Levon return container;
2499fca2b7dSJohn Levon
2509fca2b7dSJohn Levon listener_release_exit:
2519fca2b7dSJohn Levon vfio_listener_unregister(bcontainer);
2529fca2b7dSJohn Levon if (vioc->release) {
2539fca2b7dSJohn Levon vioc->release(bcontainer);
2549fca2b7dSJohn Levon }
2559fca2b7dSJohn Levon
25652ce9c35SJohn Levon enable_discards_exit:
25752ce9c35SJohn Levon ram_block_uncoordinated_discard_disable(false);
25852ce9c35SJohn Levon
2599fca2b7dSJohn Levon free_container_exit:
2609fca2b7dSJohn Levon object_unref(container);
2619fca2b7dSJohn Levon
2629fca2b7dSJohn Levon put_space_exit:
2639fca2b7dSJohn Levon vfio_address_space_put(space);
2649fca2b7dSJohn Levon
2659fca2b7dSJohn Levon return NULL;
2669fca2b7dSJohn Levon }
2679fca2b7dSJohn Levon
vfio_user_container_disconnect(VFIOUserContainer * container)2689fca2b7dSJohn Levon static void vfio_user_container_disconnect(VFIOUserContainer *container)
2699fca2b7dSJohn Levon {
2709fca2b7dSJohn Levon VFIOContainerBase *bcontainer = &container->bcontainer;
2719fca2b7dSJohn Levon VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
27252ce9c35SJohn Levon VFIOAddressSpace *space = bcontainer->space;
27352ce9c35SJohn Levon
27452ce9c35SJohn Levon ram_block_uncoordinated_discard_disable(false);
2759fca2b7dSJohn Levon
2769fca2b7dSJohn Levon vfio_listener_unregister(bcontainer);
2779fca2b7dSJohn Levon if (vioc->release) {
2789fca2b7dSJohn Levon vioc->release(bcontainer);
2799fca2b7dSJohn Levon }
2809fca2b7dSJohn Levon
2819fca2b7dSJohn Levon object_unref(container);
2829fca2b7dSJohn Levon
2839fca2b7dSJohn Levon vfio_address_space_put(space);
2849fca2b7dSJohn Levon }
2859fca2b7dSJohn Levon
vfio_user_device_get(VFIOUserContainer * container,VFIODevice * vbasedev,Error ** errp)2869fca2b7dSJohn Levon static bool vfio_user_device_get(VFIOUserContainer *container,
2879fca2b7dSJohn Levon VFIODevice *vbasedev, Error **errp)
2889fca2b7dSJohn Levon {
2893bdb738bSJohn Levon struct vfio_device_info info = { .argsz = sizeof(info) };
2903bdb738bSJohn Levon
2913bdb738bSJohn Levon
2923bdb738bSJohn Levon if (!vfio_user_get_device_info(vbasedev->proxy, &info, errp)) {
2933bdb738bSJohn Levon return false;
2943bdb738bSJohn Levon }
2959fca2b7dSJohn Levon
2969fca2b7dSJohn Levon vbasedev->fd = -1;
2979fca2b7dSJohn Levon
2989fca2b7dSJohn Levon vfio_device_prepare(vbasedev, &container->bcontainer, &info);
2999fca2b7dSJohn Levon
3009fca2b7dSJohn Levon return true;
3019fca2b7dSJohn Levon }
3029fca2b7dSJohn Levon
3039fca2b7dSJohn Levon /*
3049fca2b7dSJohn Levon * vfio_user_device_attach: attach a device to a new container.
3059fca2b7dSJohn Levon */
vfio_user_device_attach(const char * name,VFIODevice * vbasedev,AddressSpace * as,Error ** errp)3069fca2b7dSJohn Levon static bool vfio_user_device_attach(const char *name, VFIODevice *vbasedev,
3079fca2b7dSJohn Levon AddressSpace *as, Error **errp)
3089fca2b7dSJohn Levon {
3099fca2b7dSJohn Levon VFIOUserContainer *container;
3109fca2b7dSJohn Levon
31152ce9c35SJohn Levon container = vfio_user_container_connect(as, vbasedev, errp);
3129fca2b7dSJohn Levon if (container == NULL) {
3139fca2b7dSJohn Levon error_prepend(errp, "failed to connect proxy");
3149fca2b7dSJohn Levon return false;
3159fca2b7dSJohn Levon }
3169fca2b7dSJohn Levon
3179fca2b7dSJohn Levon return vfio_user_device_get(container, vbasedev, errp);
3189fca2b7dSJohn Levon }
3199fca2b7dSJohn Levon
vfio_user_device_detach(VFIODevice * vbasedev)3209fca2b7dSJohn Levon static void vfio_user_device_detach(VFIODevice *vbasedev)
3219fca2b7dSJohn Levon {
3229fca2b7dSJohn Levon VFIOUserContainer *container = container_of(vbasedev->bcontainer,
3239fca2b7dSJohn Levon VFIOUserContainer, bcontainer);
3249fca2b7dSJohn Levon
3259fca2b7dSJohn Levon vfio_device_unprepare(vbasedev);
3269fca2b7dSJohn Levon
3279fca2b7dSJohn Levon vfio_user_container_disconnect(container);
3289fca2b7dSJohn Levon }
3299fca2b7dSJohn Levon
vfio_user_pci_hot_reset(VFIODevice * vbasedev,bool single)3309fca2b7dSJohn Levon static int vfio_user_pci_hot_reset(VFIODevice *vbasedev, bool single)
3319fca2b7dSJohn Levon {
3329fca2b7dSJohn Levon /* ->needs_reset is always false for vfio-user. */
3339fca2b7dSJohn Levon return 0;
3349fca2b7dSJohn Levon }
3359fca2b7dSJohn Levon
vfio_iommu_user_class_init(ObjectClass * klass,const void * data)3369fca2b7dSJohn Levon static void vfio_iommu_user_class_init(ObjectClass *klass, const void *data)
3379fca2b7dSJohn Levon {
3389fca2b7dSJohn Levon VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
3399fca2b7dSJohn Levon
3409fca2b7dSJohn Levon vioc->setup = vfio_user_setup;
34118e899e6SJohn Levon vioc->listener_begin = vfio_user_listener_begin,
34218e899e6SJohn Levon vioc->listener_commit = vfio_user_listener_commit,
3439fca2b7dSJohn Levon vioc->dma_map = vfio_user_dma_map;
3449fca2b7dSJohn Levon vioc->dma_unmap = vfio_user_dma_unmap;
3459fca2b7dSJohn Levon vioc->attach_device = vfio_user_device_attach;
3469fca2b7dSJohn Levon vioc->detach_device = vfio_user_device_detach;
3479fca2b7dSJohn Levon vioc->set_dirty_page_tracking = vfio_user_set_dirty_page_tracking;
3489fca2b7dSJohn Levon vioc->query_dirty_bitmap = vfio_user_query_dirty_bitmap;
3499fca2b7dSJohn Levon vioc->pci_hot_reset = vfio_user_pci_hot_reset;
3509fca2b7dSJohn Levon };
3519fca2b7dSJohn Levon
3529fca2b7dSJohn Levon static const TypeInfo types[] = {
3539fca2b7dSJohn Levon {
3549fca2b7dSJohn Levon .name = TYPE_VFIO_IOMMU_USER,
3559fca2b7dSJohn Levon .parent = TYPE_VFIO_IOMMU,
3569fca2b7dSJohn Levon .instance_size = sizeof(VFIOUserContainer),
3579fca2b7dSJohn Levon .class_init = vfio_iommu_user_class_init,
3589fca2b7dSJohn Levon },
3599fca2b7dSJohn Levon };
3609fca2b7dSJohn Levon
3619fca2b7dSJohn Levon DEFINE_TYPES(types)
362