xref: /qemu/hw/vfio/cpr-legacy.c (revision d9ce74873a6a5a7c504379857461e4ae64fcf0cd)
1 /*
2  * Copyright (c) 2021-2025 Oracle and/or its affiliates.
3  *
4  * SPDX-License-Identifier: GPL-2.0-or-later
5  */
6 
7 #include <sys/ioctl.h>
8 #include <linux/vfio.h>
9 #include "qemu/osdep.h"
10 #include "hw/vfio/vfio-container.h"
11 #include "hw/vfio/vfio-device.h"
12 #include "hw/vfio/vfio-listener.h"
13 #include "migration/blocker.h"
14 #include "migration/cpr.h"
15 #include "migration/migration.h"
16 #include "migration/vmstate.h"
17 #include "qapi/error.h"
18 #include "qemu/error-report.h"
19 
vfio_dma_unmap_vaddr_all(VFIOContainer * container,Error ** errp)20 static bool vfio_dma_unmap_vaddr_all(VFIOContainer *container, Error **errp)
21 {
22     struct vfio_iommu_type1_dma_unmap unmap = {
23         .argsz = sizeof(unmap),
24         .flags = VFIO_DMA_UNMAP_FLAG_VADDR | VFIO_DMA_UNMAP_FLAG_ALL,
25         .iova = 0,
26         .size = 0,
27     };
28     if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
29         error_setg_errno(errp, errno, "vfio_dma_unmap_vaddr_all");
30         return false;
31     }
32     container->cpr.vaddr_unmapped = true;
33     return true;
34 }
35 
36 /*
37  * Set the new @vaddr for any mappings registered during cpr load.
38  * The incoming state is cleared thereafter.
39  */
vfio_legacy_cpr_dma_map(const VFIOContainerBase * bcontainer,hwaddr iova,ram_addr_t size,void * vaddr,bool readonly,MemoryRegion * mr)40 static int vfio_legacy_cpr_dma_map(const VFIOContainerBase *bcontainer,
41                                    hwaddr iova, ram_addr_t size, void *vaddr,
42                                    bool readonly, MemoryRegion *mr)
43 {
44     const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
45                                                   bcontainer);
46     struct vfio_iommu_type1_dma_map map = {
47         .argsz = sizeof(map),
48         .flags = VFIO_DMA_MAP_FLAG_VADDR,
49         .vaddr = (__u64)(uintptr_t)vaddr,
50         .iova = iova,
51         .size = size,
52     };
53 
54     g_assert(cpr_is_incoming());
55 
56     if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map)) {
57         return -errno;
58     }
59 
60     return 0;
61 }
62 
vfio_region_remap(MemoryListener * listener,MemoryRegionSection * section)63 static void vfio_region_remap(MemoryListener *listener,
64                               MemoryRegionSection *section)
65 {
66     VFIOContainer *container = container_of(listener, VFIOContainer,
67                                             cpr.remap_listener);
68     vfio_container_region_add(&container->bcontainer, section, true);
69 }
70 
vfio_cpr_supported(VFIOContainer * container,Error ** errp)71 static bool vfio_cpr_supported(VFIOContainer *container, Error **errp)
72 {
73     if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UPDATE_VADDR)) {
74         error_setg(errp, "VFIO container does not support VFIO_UPDATE_VADDR");
75         return false;
76 
77     } else if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UNMAP_ALL)) {
78         error_setg(errp, "VFIO container does not support VFIO_UNMAP_ALL");
79         return false;
80 
81     } else {
82         return true;
83     }
84 }
85 
vfio_container_pre_save(void * opaque)86 static int vfio_container_pre_save(void *opaque)
87 {
88     VFIOContainer *container = opaque;
89     Error *local_err = NULL;
90 
91     if (!vfio_dma_unmap_vaddr_all(container, &local_err)) {
92         error_report_err(local_err);
93         return -1;
94     }
95     return 0;
96 }
97 
vfio_container_post_load(void * opaque,int version_id)98 static int vfio_container_post_load(void *opaque, int version_id)
99 {
100     VFIOContainer *container = opaque;
101     VFIOContainerBase *bcontainer = &container->bcontainer;
102     VFIOGroup *group;
103     Error *local_err = NULL;
104 
105     if (!vfio_listener_register(bcontainer, &local_err)) {
106         error_report_err(local_err);
107         return -1;
108     }
109 
110     QLIST_FOREACH(group, &container->group_list, container_next) {
111         VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
112 
113         /* Restore original dma_map function */
114         vioc->dma_map = container->cpr.saved_dma_map;
115     }
116     return 0;
117 }
118 
119 static const VMStateDescription vfio_container_vmstate = {
120     .name = "vfio-container",
121     .version_id = 0,
122     .minimum_version_id = 0,
123     .priority = MIG_PRI_LOW,  /* Must happen after devices and groups */
124     .pre_save = vfio_container_pre_save,
125     .post_load = vfio_container_post_load,
126     .needed = cpr_incoming_needed,
127     .fields = (VMStateField[]) {
128         VMSTATE_END_OF_LIST()
129     }
130 };
131 
vfio_cpr_fail_notifier(NotifierWithReturn * notifier,MigrationEvent * e,Error ** errp)132 static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier,
133                                   MigrationEvent *e, Error **errp)
134 {
135     VFIOContainer *container =
136         container_of(notifier, VFIOContainer, cpr.transfer_notifier);
137     VFIOContainerBase *bcontainer = &container->bcontainer;
138 
139     if (e->type != MIG_EVENT_PRECOPY_FAILED) {
140         return 0;
141     }
142 
143     if (container->cpr.vaddr_unmapped) {
144         /*
145          * Force a call to vfio_region_remap for each mapped section by
146          * temporarily registering a listener, and temporarily diverting
147          * dma_map to vfio_legacy_cpr_dma_map.  The latter restores vaddr.
148          */
149 
150         VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
151         vioc->dma_map = vfio_legacy_cpr_dma_map;
152 
153         container->cpr.remap_listener = (MemoryListener) {
154             .name = "vfio cpr recover",
155             .region_add = vfio_region_remap
156         };
157         memory_listener_register(&container->cpr.remap_listener,
158                                  bcontainer->space->as);
159         memory_listener_unregister(&container->cpr.remap_listener);
160         container->cpr.vaddr_unmapped = false;
161         vioc->dma_map = container->cpr.saved_dma_map;
162     }
163     return 0;
164 }
165 
vfio_legacy_cpr_register_container(VFIOContainer * container,Error ** errp)166 bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp)
167 {
168     VFIOContainerBase *bcontainer = &container->bcontainer;
169     Error **cpr_blocker = &container->cpr.blocker;
170 
171     migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier,
172                                 vfio_cpr_reboot_notifier,
173                                 MIG_MODE_CPR_REBOOT);
174 
175     if (!vfio_cpr_supported(container, cpr_blocker)) {
176         return migrate_add_blocker_modes(cpr_blocker, errp,
177                                          MIG_MODE_CPR_TRANSFER, -1) == 0;
178     }
179 
180     vmstate_register(NULL, -1, &vfio_container_vmstate, container);
181 
182     /* During incoming CPR, divert calls to dma_map. */
183     if (cpr_is_incoming()) {
184         VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
185         container->cpr.saved_dma_map = vioc->dma_map;
186         vioc->dma_map = vfio_legacy_cpr_dma_map;
187     }
188 
189     migration_add_notifier_mode(&container->cpr.transfer_notifier,
190                                 vfio_cpr_fail_notifier,
191                                 MIG_MODE_CPR_TRANSFER);
192     return true;
193 }
194 
vfio_legacy_cpr_unregister_container(VFIOContainer * container)195 void vfio_legacy_cpr_unregister_container(VFIOContainer *container)
196 {
197     VFIOContainerBase *bcontainer = &container->bcontainer;
198 
199     migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
200     migrate_del_blocker(&container->cpr.blocker);
201     vmstate_unregister(NULL, &vfio_container_vmstate, container);
202     migration_remove_notifier(&container->cpr.transfer_notifier);
203 }
204 
205 /*
206  * In old QEMU, VFIO_DMA_UNMAP_FLAG_VADDR may fail on some mapping after
207  * succeeding for others, so the latter have lost their vaddr.  Call this
208  * to restore vaddr for a section with a giommu.
209  *
210  * The giommu already exists.  Find it and replay it, which calls
211  * vfio_legacy_cpr_dma_map further down the stack.
212  */
vfio_cpr_giommu_remap(VFIOContainerBase * bcontainer,MemoryRegionSection * section)213 void vfio_cpr_giommu_remap(VFIOContainerBase *bcontainer,
214                            MemoryRegionSection *section)
215 {
216     VFIOGuestIOMMU *giommu = NULL;
217     hwaddr as_offset = section->offset_within_address_space;
218     hwaddr iommu_offset = as_offset - section->offset_within_region;
219 
220     QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) {
221         if (giommu->iommu_mr == IOMMU_MEMORY_REGION(section->mr) &&
222             giommu->iommu_offset == iommu_offset) {
223             break;
224         }
225     }
226     g_assert(giommu);
227     memory_region_iommu_replay(giommu->iommu_mr, &giommu->n);
228 }
229 
230 /*
231  * In old QEMU, VFIO_DMA_UNMAP_FLAG_VADDR may fail on some mapping after
232  * succeeding for others, so the latter have lost their vaddr.  Call this
233  * to restore vaddr for a section with a RamDiscardManager.
234  *
235  * The ram discard listener already exists.  Call its populate function
236  * directly, which calls vfio_legacy_cpr_dma_map.
237  */
vfio_cpr_ram_discard_register_listener(VFIOContainerBase * bcontainer,MemoryRegionSection * section)238 bool vfio_cpr_ram_discard_register_listener(VFIOContainerBase *bcontainer,
239                                             MemoryRegionSection *section)
240 {
241     VFIORamDiscardListener *vrdl =
242         vfio_find_ram_discard_listener(bcontainer, section);
243 
244     g_assert(vrdl);
245     return vrdl->listener.notify_populate(&vrdl->listener, section) == 0;
246 }
247 
vfio_cpr_group_get_device_fd(int d,const char * name)248 int vfio_cpr_group_get_device_fd(int d, const char *name)
249 {
250     const int id = 0;
251     int fd = cpr_find_fd(name, id);
252 
253     if (fd < 0) {
254         fd = ioctl(d, VFIO_GROUP_GET_DEVICE_FD, name);
255         if (fd >= 0) {
256             cpr_save_fd(name, id, fd);
257         }
258     }
259     return fd;
260 }
261 
same_device(int fd1,int fd2)262 static bool same_device(int fd1, int fd2)
263 {
264     struct stat st1, st2;
265 
266     return !fstat(fd1, &st1) && !fstat(fd2, &st2) && st1.st_dev == st2.st_dev;
267 }
268 
vfio_cpr_container_match(VFIOContainer * container,VFIOGroup * group,int fd)269 bool vfio_cpr_container_match(VFIOContainer *container, VFIOGroup *group,
270                               int fd)
271 {
272     if (container->fd == fd) {
273         return true;
274     }
275     if (!same_device(container->fd, fd)) {
276         return false;
277     }
278     /*
279      * Same device, different fd.  This occurs when the container fd is
280      * cpr_save'd multiple times, once for each groupid, so SCM_RIGHTS
281      * produces duplicates.  De-dup it.
282      */
283     cpr_delete_fd("vfio_container_for_group", group->groupid);
284     close(fd);
285     cpr_save_fd("vfio_container_for_group", group->groupid, container->fd);
286     return true;
287 }
288