xref: /qemu/hw/vfio/container-base.c (revision 563ac3d18129a2770a285cc16c20ad50c8adc7c0)
1 /*
2  * VFIO BASE CONTAINER
3  *
4  * Copyright (C) 2023 Intel Corporation.
5  * Copyright Red Hat, Inc. 2023
6  *
7  * Authors: Yi Liu <yi.l.liu@intel.com>
8  *          Eric Auger <eric.auger@redhat.com>
9  *
10  * SPDX-License-Identifier: GPL-2.0-or-later
11  */
12 
13 #include <sys/ioctl.h>
14 #include <linux/vfio.h>
15 
16 #include "qemu/osdep.h"
17 #include "system/tcg.h"
18 #include "system/ram_addr.h"
19 #include "qapi/error.h"
20 #include "qemu/error-report.h"
21 #include "hw/vfio/vfio-container-base.h"
22 #include "hw/vfio/vfio-device.h" /* vfio_device_reset_handler */
23 #include "system/reset.h"
24 #include "vfio-helpers.h"
25 
26 #include "trace.h"
27 
28 static QLIST_HEAD(, VFIOAddressSpace) vfio_address_spaces =
29     QLIST_HEAD_INITIALIZER(vfio_address_spaces);
30 
vfio_address_space_get(AddressSpace * as)31 VFIOAddressSpace *vfio_address_space_get(AddressSpace *as)
32 {
33     VFIOAddressSpace *space;
34 
35     QLIST_FOREACH(space, &vfio_address_spaces, list) {
36         if (space->as == as) {
37             return space;
38         }
39     }
40 
41     /* No suitable VFIOAddressSpace, create a new one */
42     space = g_malloc0(sizeof(*space));
43     space->as = as;
44     QLIST_INIT(&space->containers);
45 
46     if (QLIST_EMPTY(&vfio_address_spaces)) {
47         qemu_register_reset(vfio_device_reset_handler, NULL);
48     }
49 
50     QLIST_INSERT_HEAD(&vfio_address_spaces, space, list);
51 
52     return space;
53 }
54 
vfio_address_space_put(VFIOAddressSpace * space)55 void vfio_address_space_put(VFIOAddressSpace *space)
56 {
57     if (!QLIST_EMPTY(&space->containers)) {
58         return;
59     }
60 
61     QLIST_REMOVE(space, list);
62     g_free(space);
63 
64     if (QLIST_EMPTY(&vfio_address_spaces)) {
65         qemu_unregister_reset(vfio_device_reset_handler, NULL);
66     }
67 }
68 
vfio_address_space_insert(VFIOAddressSpace * space,VFIOContainerBase * bcontainer)69 void vfio_address_space_insert(VFIOAddressSpace *space,
70                                VFIOContainerBase *bcontainer)
71 {
72     QLIST_INSERT_HEAD(&space->containers, bcontainer, next);
73     bcontainer->space = space;
74 }
75 
vfio_container_dma_map(VFIOContainerBase * bcontainer,hwaddr iova,ram_addr_t size,void * vaddr,bool readonly,MemoryRegion * mr)76 int vfio_container_dma_map(VFIOContainerBase *bcontainer,
77                            hwaddr iova, ram_addr_t size,
78                            void *vaddr, bool readonly, MemoryRegion *mr)
79 {
80     VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
81     RAMBlock *rb = mr->ram_block;
82     int mfd = rb ? qemu_ram_get_fd(rb) : -1;
83 
84     if (mfd >= 0 && vioc->dma_map_file) {
85         unsigned long start = vaddr - qemu_ram_get_host_addr(rb);
86         unsigned long offset = qemu_ram_get_fd_offset(rb);
87 
88         return vioc->dma_map_file(bcontainer, iova, size, mfd, start + offset,
89                                   readonly);
90     }
91     g_assert(vioc->dma_map);
92     return vioc->dma_map(bcontainer, iova, size, vaddr, readonly, mr);
93 }
94 
vfio_container_dma_unmap(VFIOContainerBase * bcontainer,hwaddr iova,ram_addr_t size,IOMMUTLBEntry * iotlb,bool unmap_all)95 int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
96                              hwaddr iova, ram_addr_t size,
97                              IOMMUTLBEntry *iotlb, bool unmap_all)
98 {
99     VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
100 
101     g_assert(vioc->dma_unmap);
102     return vioc->dma_unmap(bcontainer, iova, size, iotlb, unmap_all);
103 }
104 
vfio_container_add_section_window(VFIOContainerBase * bcontainer,MemoryRegionSection * section,Error ** errp)105 bool vfio_container_add_section_window(VFIOContainerBase *bcontainer,
106                                        MemoryRegionSection *section,
107                                        Error **errp)
108 {
109     VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
110 
111     if (!vioc->add_window) {
112         return true;
113     }
114 
115     return vioc->add_window(bcontainer, section, errp);
116 }
117 
vfio_container_del_section_window(VFIOContainerBase * bcontainer,MemoryRegionSection * section)118 void vfio_container_del_section_window(VFIOContainerBase *bcontainer,
119                                        MemoryRegionSection *section)
120 {
121     VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
122 
123     if (!vioc->del_window) {
124         return;
125     }
126 
127     return vioc->del_window(bcontainer, section);
128 }
129 
vfio_container_set_dirty_page_tracking(VFIOContainerBase * bcontainer,bool start,Error ** errp)130 int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
131                                            bool start, Error **errp)
132 {
133     VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
134     int ret;
135 
136     if (!bcontainer->dirty_pages_supported) {
137         return 0;
138     }
139 
140     g_assert(vioc->set_dirty_page_tracking);
141     if (bcontainer->dirty_pages_started == start) {
142         return 0;
143     }
144 
145     ret = vioc->set_dirty_page_tracking(bcontainer, start, errp);
146     if (!ret) {
147         bcontainer->dirty_pages_started = start;
148     }
149 
150     return ret;
151 }
152 
vfio_container_devices_dirty_tracking_is_started(const VFIOContainerBase * bcontainer)153 static bool vfio_container_devices_dirty_tracking_is_started(
154     const VFIOContainerBase *bcontainer)
155 {
156     VFIODevice *vbasedev;
157 
158     QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
159         if (!vbasedev->dirty_tracking) {
160             return false;
161         }
162     }
163 
164     return true;
165 }
166 
vfio_container_dirty_tracking_is_started(const VFIOContainerBase * bcontainer)167 bool vfio_container_dirty_tracking_is_started(
168     const VFIOContainerBase *bcontainer)
169 {
170     return vfio_container_devices_dirty_tracking_is_started(bcontainer) ||
171            bcontainer->dirty_pages_started;
172 }
173 
vfio_container_devices_dirty_tracking_is_supported(const VFIOContainerBase * bcontainer)174 bool vfio_container_devices_dirty_tracking_is_supported(
175     const VFIOContainerBase *bcontainer)
176 {
177     VFIODevice *vbasedev;
178 
179     QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
180         if (vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) {
181             return false;
182         }
183         if (!vbasedev->dirty_pages_supported) {
184             return false;
185         }
186     }
187 
188     return true;
189 }
190 
vfio_device_dma_logging_report(VFIODevice * vbasedev,hwaddr iova,hwaddr size,void * bitmap)191 static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova,
192                                           hwaddr size, void *bitmap)
193 {
194     uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
195                         sizeof(struct vfio_device_feature_dma_logging_report),
196                         sizeof(uint64_t))] = {};
197     struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
198     struct vfio_device_feature_dma_logging_report *report =
199         (struct vfio_device_feature_dma_logging_report *)feature->data;
200 
201     report->iova = iova;
202     report->length = size;
203     report->page_size = qemu_real_host_page_size();
204     report->bitmap = (uintptr_t)bitmap;
205 
206     feature->argsz = sizeof(buf);
207     feature->flags = VFIO_DEVICE_FEATURE_GET |
208                      VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT;
209 
210     return vbasedev->io_ops->device_feature(vbasedev, feature);
211 }
212 
vfio_container_iommu_query_dirty_bitmap(const VFIOContainerBase * bcontainer,VFIOBitmap * vbmap,hwaddr iova,hwaddr size,Error ** errp)213 static int vfio_container_iommu_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
214                    VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp)
215 {
216     VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
217 
218     g_assert(vioc->query_dirty_bitmap);
219     return vioc->query_dirty_bitmap(bcontainer, vbmap, iova, size,
220                                                errp);
221 }
222 
vfio_container_devices_query_dirty_bitmap(const VFIOContainerBase * bcontainer,VFIOBitmap * vbmap,hwaddr iova,hwaddr size,Error ** errp)223 static int vfio_container_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
224                  VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp)
225 {
226     VFIODevice *vbasedev;
227     int ret;
228 
229     QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
230         ret = vfio_device_dma_logging_report(vbasedev, iova, size,
231                                              vbmap->bitmap);
232         if (ret) {
233             error_setg_errno(errp, -ret,
234                              "%s: Failed to get DMA logging report, iova: "
235                              "0x%" HWADDR_PRIx ", size: 0x%" HWADDR_PRIx,
236                              vbasedev->name, iova, size);
237 
238             return ret;
239         }
240     }
241 
242     return 0;
243 }
244 
vfio_container_query_dirty_bitmap(const VFIOContainerBase * bcontainer,uint64_t iova,uint64_t size,ram_addr_t ram_addr,Error ** errp)245 int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
246                           uint64_t size, ram_addr_t ram_addr, Error **errp)
247 {
248     bool all_device_dirty_tracking =
249         vfio_container_devices_dirty_tracking_is_supported(bcontainer);
250     uint64_t dirty_pages;
251     VFIOBitmap vbmap;
252     int ret;
253 
254     if (!bcontainer->dirty_pages_supported && !all_device_dirty_tracking) {
255         cpu_physical_memory_set_dirty_range(ram_addr, size,
256                                             tcg_enabled() ? DIRTY_CLIENTS_ALL :
257                                             DIRTY_CLIENTS_NOCODE);
258         return 0;
259     }
260 
261     ret = vfio_bitmap_alloc(&vbmap, size);
262     if (ret) {
263         error_setg_errno(errp, -ret,
264                          "Failed to allocate dirty tracking bitmap");
265         return ret;
266     }
267 
268     if (all_device_dirty_tracking) {
269         ret = vfio_container_devices_query_dirty_bitmap(bcontainer, &vbmap, iova, size,
270                                                         errp);
271     } else {
272         ret = vfio_container_iommu_query_dirty_bitmap(bcontainer, &vbmap, iova, size,
273                                                      errp);
274     }
275 
276     if (ret) {
277         goto out;
278     }
279 
280     dirty_pages = cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, ram_addr,
281                                                          vbmap.pages);
282 
283     trace_vfio_container_query_dirty_bitmap(iova, size, vbmap.size, ram_addr,
284                                             dirty_pages);
285 out:
286     g_free(vbmap.bitmap);
287 
288     return ret;
289 }
290 
copy_iova_range(gconstpointer src,gpointer data)291 static gpointer copy_iova_range(gconstpointer src, gpointer data)
292 {
293      Range *source = (Range *)src;
294      Range *dest = g_new(Range, 1);
295 
296      range_set_bounds(dest, range_lob(source), range_upb(source));
297      return dest;
298 }
299 
vfio_container_get_iova_ranges(const VFIOContainerBase * bcontainer)300 GList *vfio_container_get_iova_ranges(const VFIOContainerBase *bcontainer)
301 {
302     assert(bcontainer);
303     return g_list_copy_deep(bcontainer->iova_ranges, copy_iova_range, NULL);
304 }
305 
vfio_container_instance_finalize(Object * obj)306 static void vfio_container_instance_finalize(Object *obj)
307 {
308     VFIOContainerBase *bcontainer = VFIO_IOMMU(obj);
309     VFIOGuestIOMMU *giommu, *tmp;
310 
311     QLIST_SAFE_REMOVE(bcontainer, next);
312 
313     QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) {
314         memory_region_unregister_iommu_notifier(
315                 MEMORY_REGION(giommu->iommu_mr), &giommu->n);
316         QLIST_REMOVE(giommu, giommu_next);
317         g_free(giommu);
318     }
319 
320     g_list_free_full(bcontainer->iova_ranges, g_free);
321 }
322 
vfio_container_instance_init(Object * obj)323 static void vfio_container_instance_init(Object *obj)
324 {
325     VFIOContainerBase *bcontainer = VFIO_IOMMU(obj);
326 
327     bcontainer->error = NULL;
328     bcontainer->dirty_pages_supported = false;
329     bcontainer->dma_max_mappings = 0;
330     bcontainer->iova_ranges = NULL;
331     QLIST_INIT(&bcontainer->giommu_list);
332     QLIST_INIT(&bcontainer->vrdl_list);
333 }
334 
335 static const TypeInfo types[] = {
336     {
337         .name = TYPE_VFIO_IOMMU,
338         .parent = TYPE_OBJECT,
339         .instance_init = vfio_container_instance_init,
340         .instance_finalize = vfio_container_instance_finalize,
341         .instance_size = sizeof(VFIOContainerBase),
342         .class_size = sizeof(VFIOIOMMUClass),
343         .abstract = true,
344     },
345 };
346 
347 DEFINE_TYPES(types)
348