1 /* 2 * Copyright (c) 2021-2025 Oracle and/or its affiliates. 3 * 4 * SPDX-License-Identifier: GPL-2.0-or-later 5 */ 6 7 #include <sys/ioctl.h> 8 #include <linux/vfio.h> 9 #include "qemu/osdep.h" 10 #include "hw/vfio/vfio-container.h" 11 #include "hw/vfio/vfio-device.h" 12 #include "hw/vfio/vfio-listener.h" 13 #include "migration/blocker.h" 14 #include "migration/cpr.h" 15 #include "migration/migration.h" 16 #include "migration/vmstate.h" 17 #include "qapi/error.h" 18 #include "qemu/error-report.h" 19 20 static bool vfio_dma_unmap_vaddr_all(VFIOContainer *container, Error **errp) 21 { 22 struct vfio_iommu_type1_dma_unmap unmap = { 23 .argsz = sizeof(unmap), 24 .flags = VFIO_DMA_UNMAP_FLAG_VADDR | VFIO_DMA_UNMAP_FLAG_ALL, 25 .iova = 0, 26 .size = 0, 27 }; 28 if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { 29 error_setg_errno(errp, errno, "vfio_dma_unmap_vaddr_all"); 30 return false; 31 } 32 container->cpr.vaddr_unmapped = true; 33 return true; 34 } 35 36 /* 37 * Set the new @vaddr for any mappings registered during cpr load. 38 * The incoming state is cleared thereafter. 39 */ 40 static int vfio_legacy_cpr_dma_map(const VFIOContainerBase *bcontainer, 41 hwaddr iova, ram_addr_t size, void *vaddr, 42 bool readonly, MemoryRegion *mr) 43 { 44 const VFIOContainer *container = container_of(bcontainer, VFIOContainer, 45 bcontainer); 46 struct vfio_iommu_type1_dma_map map = { 47 .argsz = sizeof(map), 48 .flags = VFIO_DMA_MAP_FLAG_VADDR, 49 .vaddr = (__u64)(uintptr_t)vaddr, 50 .iova = iova, 51 .size = size, 52 }; 53 54 g_assert(cpr_is_incoming()); 55 56 if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map)) { 57 return -errno; 58 } 59 60 return 0; 61 } 62 63 static void vfio_region_remap(MemoryListener *listener, 64 MemoryRegionSection *section) 65 { 66 VFIOContainer *container = container_of(listener, VFIOContainer, 67 cpr.remap_listener); 68 vfio_container_region_add(&container->bcontainer, section, true); 69 } 70 71 static bool vfio_cpr_supported(VFIOContainer *container, Error **errp) 72 { 73 if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UPDATE_VADDR)) { 74 error_setg(errp, "VFIO container does not support VFIO_UPDATE_VADDR"); 75 return false; 76 77 } else if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UNMAP_ALL)) { 78 error_setg(errp, "VFIO container does not support VFIO_UNMAP_ALL"); 79 return false; 80 81 } else { 82 return true; 83 } 84 } 85 86 static int vfio_container_pre_save(void *opaque) 87 { 88 VFIOContainer *container = opaque; 89 Error *local_err = NULL; 90 91 if (!vfio_dma_unmap_vaddr_all(container, &local_err)) { 92 error_report_err(local_err); 93 return -1; 94 } 95 return 0; 96 } 97 98 static int vfio_container_post_load(void *opaque, int version_id) 99 { 100 VFIOContainer *container = opaque; 101 VFIOContainerBase *bcontainer = &container->bcontainer; 102 VFIOGroup *group; 103 Error *local_err = NULL; 104 105 if (!vfio_listener_register(bcontainer, &local_err)) { 106 error_report_err(local_err); 107 return -1; 108 } 109 110 QLIST_FOREACH(group, &container->group_list, container_next) { 111 VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); 112 113 /* Restore original dma_map function */ 114 vioc->dma_map = container->cpr.saved_dma_map; 115 } 116 return 0; 117 } 118 119 static const VMStateDescription vfio_container_vmstate = { 120 .name = "vfio-container", 121 .version_id = 0, 122 .minimum_version_id = 0, 123 .priority = MIG_PRI_LOW, /* Must happen after devices and groups */ 124 .pre_save = vfio_container_pre_save, 125 .post_load = vfio_container_post_load, 126 .needed = cpr_incoming_needed, 127 .fields = (VMStateField[]) { 128 VMSTATE_END_OF_LIST() 129 } 130 }; 131 132 static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier, 133 MigrationEvent *e, Error **errp) 134 { 135 VFIOContainer *container = 136 container_of(notifier, VFIOContainer, cpr.transfer_notifier); 137 VFIOContainerBase *bcontainer = &container->bcontainer; 138 139 if (e->type != MIG_EVENT_PRECOPY_FAILED) { 140 return 0; 141 } 142 143 if (container->cpr.vaddr_unmapped) { 144 /* 145 * Force a call to vfio_region_remap for each mapped section by 146 * temporarily registering a listener, and temporarily diverting 147 * dma_map to vfio_legacy_cpr_dma_map. The latter restores vaddr. 148 */ 149 150 VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); 151 vioc->dma_map = vfio_legacy_cpr_dma_map; 152 153 container->cpr.remap_listener = (MemoryListener) { 154 .name = "vfio cpr recover", 155 .region_add = vfio_region_remap 156 }; 157 memory_listener_register(&container->cpr.remap_listener, 158 bcontainer->space->as); 159 memory_listener_unregister(&container->cpr.remap_listener); 160 container->cpr.vaddr_unmapped = false; 161 vioc->dma_map = container->cpr.saved_dma_map; 162 } 163 return 0; 164 } 165 166 bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp) 167 { 168 VFIOContainerBase *bcontainer = &container->bcontainer; 169 Error **cpr_blocker = &container->cpr.blocker; 170 171 migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier, 172 vfio_cpr_reboot_notifier, 173 MIG_MODE_CPR_REBOOT); 174 175 if (!vfio_cpr_supported(container, cpr_blocker)) { 176 return migrate_add_blocker_modes(cpr_blocker, errp, 177 MIG_MODE_CPR_TRANSFER, -1) == 0; 178 } 179 180 vmstate_register(NULL, -1, &vfio_container_vmstate, container); 181 182 /* During incoming CPR, divert calls to dma_map. */ 183 if (cpr_is_incoming()) { 184 VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); 185 container->cpr.saved_dma_map = vioc->dma_map; 186 vioc->dma_map = vfio_legacy_cpr_dma_map; 187 } 188 189 migration_add_notifier_mode(&container->cpr.transfer_notifier, 190 vfio_cpr_fail_notifier, 191 MIG_MODE_CPR_TRANSFER); 192 return true; 193 } 194 195 void vfio_legacy_cpr_unregister_container(VFIOContainer *container) 196 { 197 VFIOContainerBase *bcontainer = &container->bcontainer; 198 199 migration_remove_notifier(&bcontainer->cpr_reboot_notifier); 200 migrate_del_blocker(&container->cpr.blocker); 201 vmstate_unregister(NULL, &vfio_container_vmstate, container); 202 migration_remove_notifier(&container->cpr.transfer_notifier); 203 } 204 205 /* 206 * In old QEMU, VFIO_DMA_UNMAP_FLAG_VADDR may fail on some mapping after 207 * succeeding for others, so the latter have lost their vaddr. Call this 208 * to restore vaddr for a section with a giommu. 209 * 210 * The giommu already exists. Find it and replay it, which calls 211 * vfio_legacy_cpr_dma_map further down the stack. 212 */ 213 void vfio_cpr_giommu_remap(VFIOContainerBase *bcontainer, 214 MemoryRegionSection *section) 215 { 216 VFIOGuestIOMMU *giommu = NULL; 217 hwaddr as_offset = section->offset_within_address_space; 218 hwaddr iommu_offset = as_offset - section->offset_within_region; 219 220 QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) { 221 if (giommu->iommu_mr == IOMMU_MEMORY_REGION(section->mr) && 222 giommu->iommu_offset == iommu_offset) { 223 break; 224 } 225 } 226 g_assert(giommu); 227 memory_region_iommu_replay(giommu->iommu_mr, &giommu->n); 228 } 229 230 /* 231 * In old QEMU, VFIO_DMA_UNMAP_FLAG_VADDR may fail on some mapping after 232 * succeeding for others, so the latter have lost their vaddr. Call this 233 * to restore vaddr for a section with a RamDiscardManager. 234 * 235 * The ram discard listener already exists. Call its populate function 236 * directly, which calls vfio_legacy_cpr_dma_map. 237 */ 238 bool vfio_cpr_ram_discard_register_listener(VFIOContainerBase *bcontainer, 239 MemoryRegionSection *section) 240 { 241 VFIORamDiscardListener *vrdl = 242 vfio_find_ram_discard_listener(bcontainer, section); 243 244 g_assert(vrdl); 245 return vrdl->listener.notify_populate(&vrdl->listener, section) == 0; 246 } 247 248 int vfio_cpr_group_get_device_fd(int d, const char *name) 249 { 250 const int id = 0; 251 int fd = cpr_find_fd(name, id); 252 253 if (fd < 0) { 254 fd = ioctl(d, VFIO_GROUP_GET_DEVICE_FD, name); 255 if (fd >= 0) { 256 cpr_save_fd(name, id, fd); 257 } 258 } 259 return fd; 260 } 261 262 static bool same_device(int fd1, int fd2) 263 { 264 struct stat st1, st2; 265 266 return !fstat(fd1, &st1) && !fstat(fd2, &st2) && st1.st_dev == st2.st_dev; 267 } 268 269 bool vfio_cpr_container_match(VFIOContainer *container, VFIOGroup *group, 270 int fd) 271 { 272 if (container->fd == fd) { 273 return true; 274 } 275 if (!same_device(container->fd, fd)) { 276 return false; 277 } 278 /* 279 * Same device, different fd. This occurs when the container fd is 280 * cpr_save'd multiple times, once for each groupid, so SCM_RIGHTS 281 * produces duplicates. De-dup it. 282 */ 283 cpr_delete_fd("vfio_container_for_group", group->groupid); 284 close(fd); 285 cpr_save_fd("vfio_container_for_group", group->groupid, container->fd); 286 return true; 287 } 288