1 /*
2 * VFIO BASE CONTAINER
3 *
4 * Copyright (C) 2023 Intel Corporation.
5 * Copyright Red Hat, Inc. 2023
6 *
7 * Authors: Yi Liu <yi.l.liu@intel.com>
8 * Eric Auger <eric.auger@redhat.com>
9 *
10 * SPDX-License-Identifier: GPL-2.0-or-later
11 */
12
13 #include <sys/ioctl.h>
14 #include <linux/vfio.h>
15
16 #include "qemu/osdep.h"
17 #include "system/tcg.h"
18 #include "system/ram_addr.h"
19 #include "qapi/error.h"
20 #include "qemu/error-report.h"
21 #include "hw/vfio/vfio-container-base.h"
22 #include "hw/vfio/vfio-device.h" /* vfio_device_reset_handler */
23 #include "system/reset.h"
24 #include "vfio-helpers.h"
25
26 #include "trace.h"
27
28 static QLIST_HEAD(, VFIOAddressSpace) vfio_address_spaces =
29 QLIST_HEAD_INITIALIZER(vfio_address_spaces);
30
vfio_address_space_get(AddressSpace * as)31 VFIOAddressSpace *vfio_address_space_get(AddressSpace *as)
32 {
33 VFIOAddressSpace *space;
34
35 QLIST_FOREACH(space, &vfio_address_spaces, list) {
36 if (space->as == as) {
37 return space;
38 }
39 }
40
41 /* No suitable VFIOAddressSpace, create a new one */
42 space = g_malloc0(sizeof(*space));
43 space->as = as;
44 QLIST_INIT(&space->containers);
45
46 if (QLIST_EMPTY(&vfio_address_spaces)) {
47 qemu_register_reset(vfio_device_reset_handler, NULL);
48 }
49
50 QLIST_INSERT_HEAD(&vfio_address_spaces, space, list);
51
52 return space;
53 }
54
vfio_address_space_put(VFIOAddressSpace * space)55 void vfio_address_space_put(VFIOAddressSpace *space)
56 {
57 if (!QLIST_EMPTY(&space->containers)) {
58 return;
59 }
60
61 QLIST_REMOVE(space, list);
62 g_free(space);
63
64 if (QLIST_EMPTY(&vfio_address_spaces)) {
65 qemu_unregister_reset(vfio_device_reset_handler, NULL);
66 }
67 }
68
vfio_address_space_insert(VFIOAddressSpace * space,VFIOContainerBase * bcontainer)69 void vfio_address_space_insert(VFIOAddressSpace *space,
70 VFIOContainerBase *bcontainer)
71 {
72 QLIST_INSERT_HEAD(&space->containers, bcontainer, next);
73 bcontainer->space = space;
74 }
75
vfio_container_dma_map(VFIOContainerBase * bcontainer,hwaddr iova,ram_addr_t size,void * vaddr,bool readonly,MemoryRegion * mr)76 int vfio_container_dma_map(VFIOContainerBase *bcontainer,
77 hwaddr iova, ram_addr_t size,
78 void *vaddr, bool readonly, MemoryRegion *mr)
79 {
80 VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
81 RAMBlock *rb = mr->ram_block;
82 int mfd = rb ? qemu_ram_get_fd(rb) : -1;
83
84 if (mfd >= 0 && vioc->dma_map_file) {
85 unsigned long start = vaddr - qemu_ram_get_host_addr(rb);
86 unsigned long offset = qemu_ram_get_fd_offset(rb);
87
88 return vioc->dma_map_file(bcontainer, iova, size, mfd, start + offset,
89 readonly);
90 }
91 g_assert(vioc->dma_map);
92 return vioc->dma_map(bcontainer, iova, size, vaddr, readonly, mr);
93 }
94
vfio_container_dma_unmap(VFIOContainerBase * bcontainer,hwaddr iova,ram_addr_t size,IOMMUTLBEntry * iotlb,bool unmap_all)95 int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
96 hwaddr iova, ram_addr_t size,
97 IOMMUTLBEntry *iotlb, bool unmap_all)
98 {
99 VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
100
101 g_assert(vioc->dma_unmap);
102 return vioc->dma_unmap(bcontainer, iova, size, iotlb, unmap_all);
103 }
104
vfio_container_add_section_window(VFIOContainerBase * bcontainer,MemoryRegionSection * section,Error ** errp)105 bool vfio_container_add_section_window(VFIOContainerBase *bcontainer,
106 MemoryRegionSection *section,
107 Error **errp)
108 {
109 VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
110
111 if (!vioc->add_window) {
112 return true;
113 }
114
115 return vioc->add_window(bcontainer, section, errp);
116 }
117
vfio_container_del_section_window(VFIOContainerBase * bcontainer,MemoryRegionSection * section)118 void vfio_container_del_section_window(VFIOContainerBase *bcontainer,
119 MemoryRegionSection *section)
120 {
121 VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
122
123 if (!vioc->del_window) {
124 return;
125 }
126
127 return vioc->del_window(bcontainer, section);
128 }
129
vfio_container_set_dirty_page_tracking(VFIOContainerBase * bcontainer,bool start,Error ** errp)130 int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
131 bool start, Error **errp)
132 {
133 VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
134 int ret;
135
136 if (!bcontainer->dirty_pages_supported) {
137 return 0;
138 }
139
140 g_assert(vioc->set_dirty_page_tracking);
141 if (bcontainer->dirty_pages_started == start) {
142 return 0;
143 }
144
145 ret = vioc->set_dirty_page_tracking(bcontainer, start, errp);
146 if (!ret) {
147 bcontainer->dirty_pages_started = start;
148 }
149
150 return ret;
151 }
152
vfio_container_devices_dirty_tracking_is_started(const VFIOContainerBase * bcontainer)153 static bool vfio_container_devices_dirty_tracking_is_started(
154 const VFIOContainerBase *bcontainer)
155 {
156 VFIODevice *vbasedev;
157
158 QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
159 if (!vbasedev->dirty_tracking) {
160 return false;
161 }
162 }
163
164 return true;
165 }
166
vfio_container_dirty_tracking_is_started(const VFIOContainerBase * bcontainer)167 bool vfio_container_dirty_tracking_is_started(
168 const VFIOContainerBase *bcontainer)
169 {
170 return vfio_container_devices_dirty_tracking_is_started(bcontainer) ||
171 bcontainer->dirty_pages_started;
172 }
173
vfio_container_devices_dirty_tracking_is_supported(const VFIOContainerBase * bcontainer)174 bool vfio_container_devices_dirty_tracking_is_supported(
175 const VFIOContainerBase *bcontainer)
176 {
177 VFIODevice *vbasedev;
178
179 QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
180 if (vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) {
181 return false;
182 }
183 if (!vbasedev->dirty_pages_supported) {
184 return false;
185 }
186 }
187
188 return true;
189 }
190
vfio_device_dma_logging_report(VFIODevice * vbasedev,hwaddr iova,hwaddr size,void * bitmap)191 static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova,
192 hwaddr size, void *bitmap)
193 {
194 uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
195 sizeof(struct vfio_device_feature_dma_logging_report),
196 sizeof(uint64_t))] = {};
197 struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
198 struct vfio_device_feature_dma_logging_report *report =
199 (struct vfio_device_feature_dma_logging_report *)feature->data;
200
201 report->iova = iova;
202 report->length = size;
203 report->page_size = qemu_real_host_page_size();
204 report->bitmap = (uintptr_t)bitmap;
205
206 feature->argsz = sizeof(buf);
207 feature->flags = VFIO_DEVICE_FEATURE_GET |
208 VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT;
209
210 return vbasedev->io_ops->device_feature(vbasedev, feature);
211 }
212
vfio_container_iommu_query_dirty_bitmap(const VFIOContainerBase * bcontainer,VFIOBitmap * vbmap,hwaddr iova,hwaddr size,Error ** errp)213 static int vfio_container_iommu_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
214 VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp)
215 {
216 VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
217
218 g_assert(vioc->query_dirty_bitmap);
219 return vioc->query_dirty_bitmap(bcontainer, vbmap, iova, size,
220 errp);
221 }
222
vfio_container_devices_query_dirty_bitmap(const VFIOContainerBase * bcontainer,VFIOBitmap * vbmap,hwaddr iova,hwaddr size,Error ** errp)223 static int vfio_container_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
224 VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp)
225 {
226 VFIODevice *vbasedev;
227 int ret;
228
229 QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
230 ret = vfio_device_dma_logging_report(vbasedev, iova, size,
231 vbmap->bitmap);
232 if (ret) {
233 error_setg_errno(errp, -ret,
234 "%s: Failed to get DMA logging report, iova: "
235 "0x%" HWADDR_PRIx ", size: 0x%" HWADDR_PRIx,
236 vbasedev->name, iova, size);
237
238 return ret;
239 }
240 }
241
242 return 0;
243 }
244
vfio_container_query_dirty_bitmap(const VFIOContainerBase * bcontainer,uint64_t iova,uint64_t size,ram_addr_t ram_addr,Error ** errp)245 int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
246 uint64_t size, ram_addr_t ram_addr, Error **errp)
247 {
248 bool all_device_dirty_tracking =
249 vfio_container_devices_dirty_tracking_is_supported(bcontainer);
250 uint64_t dirty_pages;
251 VFIOBitmap vbmap;
252 int ret;
253
254 if (!bcontainer->dirty_pages_supported && !all_device_dirty_tracking) {
255 cpu_physical_memory_set_dirty_range(ram_addr, size,
256 tcg_enabled() ? DIRTY_CLIENTS_ALL :
257 DIRTY_CLIENTS_NOCODE);
258 return 0;
259 }
260
261 ret = vfio_bitmap_alloc(&vbmap, size);
262 if (ret) {
263 error_setg_errno(errp, -ret,
264 "Failed to allocate dirty tracking bitmap");
265 return ret;
266 }
267
268 if (all_device_dirty_tracking) {
269 ret = vfio_container_devices_query_dirty_bitmap(bcontainer, &vbmap, iova, size,
270 errp);
271 } else {
272 ret = vfio_container_iommu_query_dirty_bitmap(bcontainer, &vbmap, iova, size,
273 errp);
274 }
275
276 if (ret) {
277 goto out;
278 }
279
280 dirty_pages = cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, ram_addr,
281 vbmap.pages);
282
283 trace_vfio_container_query_dirty_bitmap(iova, size, vbmap.size, ram_addr,
284 dirty_pages);
285 out:
286 g_free(vbmap.bitmap);
287
288 return ret;
289 }
290
copy_iova_range(gconstpointer src,gpointer data)291 static gpointer copy_iova_range(gconstpointer src, gpointer data)
292 {
293 Range *source = (Range *)src;
294 Range *dest = g_new(Range, 1);
295
296 range_set_bounds(dest, range_lob(source), range_upb(source));
297 return dest;
298 }
299
vfio_container_get_iova_ranges(const VFIOContainerBase * bcontainer)300 GList *vfio_container_get_iova_ranges(const VFIOContainerBase *bcontainer)
301 {
302 assert(bcontainer);
303 return g_list_copy_deep(bcontainer->iova_ranges, copy_iova_range, NULL);
304 }
305
vfio_container_instance_finalize(Object * obj)306 static void vfio_container_instance_finalize(Object *obj)
307 {
308 VFIOContainerBase *bcontainer = VFIO_IOMMU(obj);
309 VFIOGuestIOMMU *giommu, *tmp;
310
311 QLIST_SAFE_REMOVE(bcontainer, next);
312
313 QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) {
314 memory_region_unregister_iommu_notifier(
315 MEMORY_REGION(giommu->iommu_mr), &giommu->n);
316 QLIST_REMOVE(giommu, giommu_next);
317 g_free(giommu);
318 }
319
320 g_list_free_full(bcontainer->iova_ranges, g_free);
321 }
322
vfio_container_instance_init(Object * obj)323 static void vfio_container_instance_init(Object *obj)
324 {
325 VFIOContainerBase *bcontainer = VFIO_IOMMU(obj);
326
327 bcontainer->error = NULL;
328 bcontainer->dirty_pages_supported = false;
329 bcontainer->dma_max_mappings = 0;
330 bcontainer->iova_ranges = NULL;
331 QLIST_INIT(&bcontainer->giommu_list);
332 QLIST_INIT(&bcontainer->vrdl_list);
333 }
334
335 static const TypeInfo types[] = {
336 {
337 .name = TYPE_VFIO_IOMMU,
338 .parent = TYPE_OBJECT,
339 .instance_init = vfio_container_instance_init,
340 .instance_finalize = vfio_container_instance_finalize,
341 .instance_size = sizeof(VFIOContainerBase),
342 .class_size = sizeof(VFIOIOMMUClass),
343 .abstract = true,
344 },
345 };
346
347 DEFINE_TYPES(types)
348