106c6a658SSteve Sistare /* 206c6a658SSteve Sistare * Copyright (c) 2024-2025 Oracle and/or its affiliates. 306c6a658SSteve Sistare * 406c6a658SSteve Sistare * SPDX-License-Identifier: GPL-2.0-or-later 506c6a658SSteve Sistare */ 606c6a658SSteve Sistare 706c6a658SSteve Sistare #include "qemu/osdep.h" 8*5c066c4bSSteve Sistare #include "qemu/error-report.h" 906c6a658SSteve Sistare #include "qapi/error.h" 1006c6a658SSteve Sistare #include "hw/vfio/vfio-cpr.h" 11f2f3e466SSteve Sistare #include "hw/vfio/vfio-device.h" 1206c6a658SSteve Sistare #include "migration/blocker.h" 1306c6a658SSteve Sistare #include "migration/cpr.h" 1406c6a658SSteve Sistare #include "migration/migration.h" 1506c6a658SSteve Sistare #include "migration/vmstate.h" 1606c6a658SSteve Sistare #include "system/iommufd.h" 1706c6a658SSteve Sistare #include "vfio-iommufd.h" 18f2f3e466SSteve Sistare #include "trace.h" 1906c6a658SSteve Sistare 20f2f3e466SSteve Sistare typedef struct CprVFIODevice { 21f2f3e466SSteve Sistare char *name; 22f2f3e466SSteve Sistare unsigned int namelen; 23f2f3e466SSteve Sistare uint32_t ioas_id; 24f2f3e466SSteve Sistare int devid; 25f2f3e466SSteve Sistare uint32_t hwpt_id; 26f2f3e466SSteve Sistare QLIST_ENTRY(CprVFIODevice) next; 27f2f3e466SSteve Sistare } CprVFIODevice; 28f2f3e466SSteve Sistare 29f2f3e466SSteve Sistare static const VMStateDescription vmstate_cpr_vfio_device = { 30f2f3e466SSteve Sistare .name = "cpr vfio device", 31f2f3e466SSteve Sistare .version_id = 1, 32f2f3e466SSteve Sistare .minimum_version_id = 1, 33f2f3e466SSteve Sistare .fields = (VMStateField[]) { 34f2f3e466SSteve Sistare VMSTATE_UINT32(namelen, CprVFIODevice), 35f2f3e466SSteve Sistare VMSTATE_VBUFFER_ALLOC_UINT32(name, CprVFIODevice, 0, NULL, namelen), 36f2f3e466SSteve Sistare VMSTATE_INT32(devid, CprVFIODevice), 37f2f3e466SSteve Sistare VMSTATE_UINT32(ioas_id, CprVFIODevice), 38f2f3e466SSteve Sistare VMSTATE_UINT32(hwpt_id, CprVFIODevice), 39f2f3e466SSteve Sistare VMSTATE_END_OF_LIST() 40f2f3e466SSteve Sistare } 41f2f3e466SSteve Sistare }; 42f2f3e466SSteve Sistare 43f2f3e466SSteve Sistare const VMStateDescription vmstate_cpr_vfio_devices = { 44f2f3e466SSteve Sistare .name = CPR_STATE "/vfio devices", 45f2f3e466SSteve Sistare .version_id = 1, 46f2f3e466SSteve Sistare .minimum_version_id = 1, 47f2f3e466SSteve Sistare .fields = (const VMStateField[]){ 48f2f3e466SSteve Sistare VMSTATE_QLIST_V(vfio_devices, CprState, 1, vmstate_cpr_vfio_device, 49f2f3e466SSteve Sistare CprVFIODevice, next), 50f2f3e466SSteve Sistare VMSTATE_END_OF_LIST() 51f2f3e466SSteve Sistare } 52f2f3e466SSteve Sistare }; 53f2f3e466SSteve Sistare 54f2f3e466SSteve Sistare static void vfio_cpr_save_device(VFIODevice *vbasedev) 55f2f3e466SSteve Sistare { 56f2f3e466SSteve Sistare CprVFIODevice *elem = g_new0(CprVFIODevice, 1); 57f2f3e466SSteve Sistare 58f2f3e466SSteve Sistare elem->name = g_strdup(vbasedev->name); 59f2f3e466SSteve Sistare elem->namelen = strlen(vbasedev->name) + 1; 60f2f3e466SSteve Sistare elem->ioas_id = vbasedev->cpr.ioas_id; 61f2f3e466SSteve Sistare elem->devid = vbasedev->devid; 62f2f3e466SSteve Sistare elem->hwpt_id = vbasedev->cpr.hwpt_id; 63f2f3e466SSteve Sistare QLIST_INSERT_HEAD(&cpr_state.vfio_devices, elem, next); 64f2f3e466SSteve Sistare } 65f2f3e466SSteve Sistare 66f2f3e466SSteve Sistare static CprVFIODevice *find_device(const char *name) 67f2f3e466SSteve Sistare { 68f2f3e466SSteve Sistare CprVFIODeviceList *head = &cpr_state.vfio_devices; 69f2f3e466SSteve Sistare CprVFIODevice *elem; 70f2f3e466SSteve Sistare 71f2f3e466SSteve Sistare QLIST_FOREACH(elem, head, next) { 72f2f3e466SSteve Sistare if (!strcmp(elem->name, name)) { 73f2f3e466SSteve Sistare return elem; 74f2f3e466SSteve Sistare } 75f2f3e466SSteve Sistare } 76f2f3e466SSteve Sistare return NULL; 77f2f3e466SSteve Sistare } 78f2f3e466SSteve Sistare 79f2f3e466SSteve Sistare static void vfio_cpr_delete_device(const char *name) 80f2f3e466SSteve Sistare { 81f2f3e466SSteve Sistare CprVFIODevice *elem = find_device(name); 82f2f3e466SSteve Sistare 83f2f3e466SSteve Sistare if (elem) { 84f2f3e466SSteve Sistare QLIST_REMOVE(elem, next); 85f2f3e466SSteve Sistare g_free(elem->name); 86f2f3e466SSteve Sistare g_free(elem); 87f2f3e466SSteve Sistare } 88f2f3e466SSteve Sistare } 89f2f3e466SSteve Sistare 90f2f3e466SSteve Sistare static bool vfio_cpr_find_device(VFIODevice *vbasedev) 91f2f3e466SSteve Sistare { 92f2f3e466SSteve Sistare CprVFIODevice *elem = find_device(vbasedev->name); 93f2f3e466SSteve Sistare 94f2f3e466SSteve Sistare if (elem) { 95f2f3e466SSteve Sistare vbasedev->cpr.ioas_id = elem->ioas_id; 96f2f3e466SSteve Sistare vbasedev->devid = elem->devid; 97f2f3e466SSteve Sistare vbasedev->cpr.hwpt_id = elem->hwpt_id; 98f2f3e466SSteve Sistare trace_vfio_cpr_find_device(elem->ioas_id, elem->devid, elem->hwpt_id); 99f2f3e466SSteve Sistare return true; 100f2f3e466SSteve Sistare } 101f2f3e466SSteve Sistare return false; 102f2f3e466SSteve Sistare } 103a6f2f9c4SSteve Sistare 10406c6a658SSteve Sistare static bool vfio_cpr_supported(IOMMUFDBackend *be, Error **errp) 10506c6a658SSteve Sistare { 10606c6a658SSteve Sistare if (!iommufd_change_process_capable(be)) { 10706c6a658SSteve Sistare if (errp) { 10806c6a658SSteve Sistare error_setg(errp, "vfio iommufd backend does not support " 10906c6a658SSteve Sistare "IOMMU_IOAS_CHANGE_PROCESS"); 11006c6a658SSteve Sistare } 11106c6a658SSteve Sistare return false; 11206c6a658SSteve Sistare } 11306c6a658SSteve Sistare return true; 11406c6a658SSteve Sistare } 11506c6a658SSteve Sistare 116*5c066c4bSSteve Sistare static int iommufd_cpr_pre_save(void *opaque) 117*5c066c4bSSteve Sistare { 118*5c066c4bSSteve Sistare IOMMUFDBackend *be = opaque; 119*5c066c4bSSteve Sistare 120*5c066c4bSSteve Sistare /* 121*5c066c4bSSteve Sistare * The process has not changed yet, but proactively try the ioctl, 122*5c066c4bSSteve Sistare * and it will fail if any DMA mappings are not supported. 123*5c066c4bSSteve Sistare */ 124*5c066c4bSSteve Sistare if (!iommufd_change_process_capable(be)) { 125*5c066c4bSSteve Sistare error_report("some memory regions do not support " 126*5c066c4bSSteve Sistare "IOMMU_IOAS_CHANGE_PROCESS"); 127*5c066c4bSSteve Sistare return -1; 128*5c066c4bSSteve Sistare } 129*5c066c4bSSteve Sistare return 0; 130*5c066c4bSSteve Sistare } 131*5c066c4bSSteve Sistare 132*5c066c4bSSteve Sistare static int iommufd_cpr_post_load(void *opaque, int version_id) 133*5c066c4bSSteve Sistare { 134*5c066c4bSSteve Sistare IOMMUFDBackend *be = opaque; 135*5c066c4bSSteve Sistare Error *local_err = NULL; 136*5c066c4bSSteve Sistare 137*5c066c4bSSteve Sistare if (!iommufd_change_process(be, &local_err)) { 138*5c066c4bSSteve Sistare error_report_err(local_err); 139*5c066c4bSSteve Sistare return -1; 140*5c066c4bSSteve Sistare } 141*5c066c4bSSteve Sistare return 0; 142*5c066c4bSSteve Sistare } 143*5c066c4bSSteve Sistare 14406c6a658SSteve Sistare static const VMStateDescription iommufd_cpr_vmstate = { 14506c6a658SSteve Sistare .name = "iommufd", 14606c6a658SSteve Sistare .version_id = 0, 14706c6a658SSteve Sistare .minimum_version_id = 0, 148*5c066c4bSSteve Sistare .pre_save = iommufd_cpr_pre_save, 149*5c066c4bSSteve Sistare .post_load = iommufd_cpr_post_load, 15006c6a658SSteve Sistare .needed = cpr_incoming_needed, 15106c6a658SSteve Sistare .fields = (VMStateField[]) { 15206c6a658SSteve Sistare VMSTATE_END_OF_LIST() 15306c6a658SSteve Sistare } 15406c6a658SSteve Sistare }; 15506c6a658SSteve Sistare 15606c6a658SSteve Sistare bool vfio_iommufd_cpr_register_iommufd(IOMMUFDBackend *be, Error **errp) 15706c6a658SSteve Sistare { 15806c6a658SSteve Sistare Error **cpr_blocker = &be->cpr_blocker; 15906c6a658SSteve Sistare 16006c6a658SSteve Sistare if (!vfio_cpr_supported(be, cpr_blocker)) { 16106c6a658SSteve Sistare return migrate_add_blocker_modes(cpr_blocker, errp, 16206c6a658SSteve Sistare MIG_MODE_CPR_TRANSFER, -1) == 0; 16306c6a658SSteve Sistare } 16406c6a658SSteve Sistare 16506c6a658SSteve Sistare vmstate_register(NULL, -1, &iommufd_cpr_vmstate, be); 16606c6a658SSteve Sistare 16706c6a658SSteve Sistare return true; 16806c6a658SSteve Sistare } 16906c6a658SSteve Sistare 17006c6a658SSteve Sistare void vfio_iommufd_cpr_unregister_iommufd(IOMMUFDBackend *be) 17106c6a658SSteve Sistare { 17206c6a658SSteve Sistare vmstate_unregister(NULL, &iommufd_cpr_vmstate, be); 17306c6a658SSteve Sistare migrate_del_blocker(&be->cpr_blocker); 17406c6a658SSteve Sistare } 17506c6a658SSteve Sistare 17606c6a658SSteve Sistare bool vfio_iommufd_cpr_register_container(VFIOIOMMUFDContainer *container, 17706c6a658SSteve Sistare Error **errp) 17806c6a658SSteve Sistare { 17906c6a658SSteve Sistare VFIOContainerBase *bcontainer = &container->bcontainer; 18006c6a658SSteve Sistare 18106c6a658SSteve Sistare migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier, 18206c6a658SSteve Sistare vfio_cpr_reboot_notifier, 18306c6a658SSteve Sistare MIG_MODE_CPR_REBOOT); 18406c6a658SSteve Sistare 18506c6a658SSteve Sistare vfio_cpr_add_kvm_notifier(); 18606c6a658SSteve Sistare 18706c6a658SSteve Sistare return true; 18806c6a658SSteve Sistare } 18906c6a658SSteve Sistare 19006c6a658SSteve Sistare void vfio_iommufd_cpr_unregister_container(VFIOIOMMUFDContainer *container) 19106c6a658SSteve Sistare { 19206c6a658SSteve Sistare VFIOContainerBase *bcontainer = &container->bcontainer; 19306c6a658SSteve Sistare 19406c6a658SSteve Sistare migration_remove_notifier(&bcontainer->cpr_reboot_notifier); 19506c6a658SSteve Sistare } 19606c6a658SSteve Sistare 19706c6a658SSteve Sistare void vfio_iommufd_cpr_register_device(VFIODevice *vbasedev) 19806c6a658SSteve Sistare { 199f2f3e466SSteve Sistare if (!cpr_is_incoming()) { 2002a3f0a59SSteve Sistare /* 2012a3f0a59SSteve Sistare * Beware fd may have already been saved by vfio_device_set_fd, 2022a3f0a59SSteve Sistare * so call resave to avoid a duplicate entry. 2032a3f0a59SSteve Sistare */ 2042a3f0a59SSteve Sistare cpr_resave_fd(vbasedev->name, 0, vbasedev->fd); 205f2f3e466SSteve Sistare vfio_cpr_save_device(vbasedev); 206f2f3e466SSteve Sistare } 20706c6a658SSteve Sistare } 20806c6a658SSteve Sistare 20906c6a658SSteve Sistare void vfio_iommufd_cpr_unregister_device(VFIODevice *vbasedev) 21006c6a658SSteve Sistare { 2112a3f0a59SSteve Sistare cpr_delete_fd(vbasedev->name, 0); 212f2f3e466SSteve Sistare vfio_cpr_delete_device(vbasedev->name); 213f2f3e466SSteve Sistare } 214f2f3e466SSteve Sistare 215f2f3e466SSteve Sistare void vfio_cpr_load_device(VFIODevice *vbasedev) 216f2f3e466SSteve Sistare { 217f2f3e466SSteve Sistare if (cpr_is_incoming()) { 218f2f3e466SSteve Sistare bool ret = vfio_cpr_find_device(vbasedev); 219f2f3e466SSteve Sistare g_assert(ret); 2202a3f0a59SSteve Sistare 2212a3f0a59SSteve Sistare if (vbasedev->fd < 0) { 2222a3f0a59SSteve Sistare vbasedev->fd = cpr_find_fd(vbasedev->name, 0); 2232a3f0a59SSteve Sistare } 224f2f3e466SSteve Sistare } 22506c6a658SSteve Sistare } 226