xref: /qemu/backends/iommufd.c (revision 563ac3d18129a2770a285cc16c20ad50c8adc7c0)
1 /*
2  * iommufd container backend
3  *
4  * Copyright (C) 2023 Intel Corporation.
5  * Copyright Red Hat, Inc. 2023
6  *
7  * Authors: Yi Liu <yi.l.liu@intel.com>
8  *          Eric Auger <eric.auger@redhat.com>
9  *
10  * SPDX-License-Identifier: GPL-2.0-or-later
11  */
12 
13 #include "qemu/osdep.h"
14 #include "system/iommufd.h"
15 #include "qapi/error.h"
16 #include "qemu/module.h"
17 #include "qom/object_interfaces.h"
18 #include "qemu/error-report.h"
19 #include "migration/cpr.h"
20 #include "monitor/monitor.h"
21 #include "trace.h"
22 #include "hw/vfio/vfio-device.h"
23 #include <sys/ioctl.h>
24 #include <linux/iommufd.h>
25 
iommufd_fd_name(IOMMUFDBackend * be)26 static const char *iommufd_fd_name(IOMMUFDBackend *be)
27 {
28     return object_get_canonical_path_component(OBJECT(be));
29 }
30 
iommufd_backend_init(Object * obj)31 static void iommufd_backend_init(Object *obj)
32 {
33     IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
34 
35     be->fd = -1;
36     be->users = 0;
37     be->owned = true;
38 }
39 
iommufd_backend_finalize(Object * obj)40 static void iommufd_backend_finalize(Object *obj)
41 {
42     IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
43 
44     if (be->owned) {
45         close(be->fd);
46         be->fd = -1;
47     }
48 }
49 
iommufd_backend_set_fd(Object * obj,const char * str,Error ** errp)50 static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp)
51 {
52     ERRP_GUARD();
53     IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
54     int fd = -1;
55 
56     fd = monitor_fd_param(monitor_cur(), str, errp);
57     if (fd == -1) {
58         error_prepend(errp, "Could not parse remote object fd %s:", str);
59         return;
60     }
61     be->fd = fd;
62     be->owned = false;
63     trace_iommu_backend_set_fd(be->fd);
64 }
65 
iommufd_backend_can_be_deleted(UserCreatable * uc)66 static bool iommufd_backend_can_be_deleted(UserCreatable *uc)
67 {
68     IOMMUFDBackend *be = IOMMUFD_BACKEND(uc);
69 
70     return !be->users;
71 }
72 
iommufd_backend_complete(UserCreatable * uc,Error ** errp)73 static void iommufd_backend_complete(UserCreatable *uc, Error **errp)
74 {
75     IOMMUFDBackend *be = IOMMUFD_BACKEND(uc);
76     const char *name = iommufd_fd_name(be);
77 
78     if (!be->owned) {
79         /* fd came from the command line. Fetch updated value from cpr state. */
80         if (cpr_is_incoming()) {
81             be->fd = cpr_find_fd(name, 0);
82         } else {
83             cpr_save_fd(name, 0, be->fd);
84         }
85     }
86 }
87 
iommufd_backend_class_init(ObjectClass * oc,const void * data)88 static void iommufd_backend_class_init(ObjectClass *oc, const void *data)
89 {
90     UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
91 
92     ucc->can_be_deleted = iommufd_backend_can_be_deleted;
93     ucc->complete = iommufd_backend_complete;
94 
95     object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd);
96 }
97 
iommufd_change_process_capable(IOMMUFDBackend * be)98 bool iommufd_change_process_capable(IOMMUFDBackend *be)
99 {
100     struct iommu_ioas_change_process args = {.size = sizeof(args)};
101 
102     /*
103      * Call IOMMU_IOAS_CHANGE_PROCESS to verify it is a recognized ioctl.
104      * This is a no-op if the process has not changed since DMA was mapped.
105      */
106     return !ioctl(be->fd, IOMMU_IOAS_CHANGE_PROCESS, &args);
107 }
108 
iommufd_change_process(IOMMUFDBackend * be,Error ** errp)109 bool iommufd_change_process(IOMMUFDBackend *be, Error **errp)
110 {
111     struct iommu_ioas_change_process args = {.size = sizeof(args)};
112     bool ret = !ioctl(be->fd, IOMMU_IOAS_CHANGE_PROCESS, &args);
113 
114     if (!ret) {
115         error_setg_errno(errp, errno, "IOMMU_IOAS_CHANGE_PROCESS fd %d failed",
116                          be->fd);
117     }
118     trace_iommufd_change_process(be->fd, ret);
119     return ret;
120 }
121 
iommufd_backend_connect(IOMMUFDBackend * be,Error ** errp)122 bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
123 {
124     int fd;
125 
126     if (be->owned && !be->users) {
127         fd = cpr_open_fd("/dev/iommu", O_RDWR, iommufd_fd_name(be), 0, errp);
128         if (fd < 0) {
129             return false;
130         }
131         be->fd = fd;
132     }
133     if (!be->users && !vfio_iommufd_cpr_register_iommufd(be, errp)) {
134         if (be->owned) {
135             close(be->fd);
136             be->fd = -1;
137         }
138         return false;
139     }
140     be->users++;
141 
142     trace_iommufd_backend_connect(be->fd, be->owned, be->users);
143     return true;
144 }
145 
iommufd_backend_disconnect(IOMMUFDBackend * be)146 void iommufd_backend_disconnect(IOMMUFDBackend *be)
147 {
148     if (!be->users) {
149         goto out;
150     }
151     be->users--;
152     if (!be->users) {
153         vfio_iommufd_cpr_unregister_iommufd(be);
154         if (be->owned) {
155             cpr_delete_fd(iommufd_fd_name(be), 0);
156             close(be->fd);
157             be->fd = -1;
158         }
159     }
160 out:
161     trace_iommufd_backend_disconnect(be->fd, be->users);
162 }
163 
iommufd_backend_alloc_ioas(IOMMUFDBackend * be,uint32_t * ioas_id,Error ** errp)164 bool iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
165                                 Error **errp)
166 {
167     int fd = be->fd;
168     struct iommu_ioas_alloc alloc_data  = {
169         .size = sizeof(alloc_data),
170         .flags = 0,
171     };
172 
173     if (ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data)) {
174         error_setg_errno(errp, errno, "Failed to allocate ioas");
175         return false;
176     }
177 
178     *ioas_id = alloc_data.out_ioas_id;
179     trace_iommufd_backend_alloc_ioas(fd, *ioas_id);
180 
181     return true;
182 }
183 
iommufd_backend_free_id(IOMMUFDBackend * be,uint32_t id)184 void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id)
185 {
186     int ret, fd = be->fd;
187     struct iommu_destroy des = {
188         .size = sizeof(des),
189         .id = id,
190     };
191 
192     ret = ioctl(fd, IOMMU_DESTROY, &des);
193     trace_iommufd_backend_free_id(fd, id, ret);
194     if (ret) {
195         error_report("Failed to free id: %u %m", id);
196     }
197 }
198 
iommufd_backend_map_dma(IOMMUFDBackend * be,uint32_t ioas_id,hwaddr iova,ram_addr_t size,void * vaddr,bool readonly)199 int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
200                             ram_addr_t size, void *vaddr, bool readonly)
201 {
202     int ret, fd = be->fd;
203     struct iommu_ioas_map map = {
204         .size = sizeof(map),
205         .flags = IOMMU_IOAS_MAP_READABLE |
206                  IOMMU_IOAS_MAP_FIXED_IOVA,
207         .ioas_id = ioas_id,
208         .__reserved = 0,
209         .user_va = (uintptr_t)vaddr,
210         .iova = iova,
211         .length = size,
212     };
213 
214     if (!readonly) {
215         map.flags |= IOMMU_IOAS_MAP_WRITEABLE;
216     }
217 
218     ret = ioctl(fd, IOMMU_IOAS_MAP, &map);
219     trace_iommufd_backend_map_dma(fd, ioas_id, iova, size,
220                                   vaddr, readonly, ret);
221     if (ret) {
222         ret = -errno;
223 
224         /* TODO: Not support mapping hardware PCI BAR region for now. */
225         if (errno == EFAULT) {
226             warn_report("IOMMU_IOAS_MAP failed: %m, PCI BAR?");
227         }
228     }
229     return ret;
230 }
231 
iommufd_backend_map_file_dma(IOMMUFDBackend * be,uint32_t ioas_id,hwaddr iova,ram_addr_t size,int mfd,unsigned long start,bool readonly)232 int iommufd_backend_map_file_dma(IOMMUFDBackend *be, uint32_t ioas_id,
233                                  hwaddr iova, ram_addr_t size,
234                                  int mfd, unsigned long start, bool readonly)
235 {
236     int ret, fd = be->fd;
237     struct iommu_ioas_map_file map = {
238         .size = sizeof(map),
239         .flags = IOMMU_IOAS_MAP_READABLE |
240                  IOMMU_IOAS_MAP_FIXED_IOVA,
241         .ioas_id = ioas_id,
242         .fd = mfd,
243         .start = start,
244         .iova = iova,
245         .length = size,
246     };
247 
248     if (cpr_is_incoming()) {
249         return 0;
250     }
251 
252     if (!readonly) {
253         map.flags |= IOMMU_IOAS_MAP_WRITEABLE;
254     }
255 
256     ret = ioctl(fd, IOMMU_IOAS_MAP_FILE, &map);
257     trace_iommufd_backend_map_file_dma(fd, ioas_id, iova, size, mfd, start,
258                                        readonly, ret);
259     if (ret) {
260         ret = -errno;
261 
262         /* TODO: Not support mapping hardware PCI BAR region for now. */
263         if (errno == EFAULT) {
264             warn_report("IOMMU_IOAS_MAP_FILE failed: %m, PCI BAR?");
265         }
266     }
267     return ret;
268 }
269 
iommufd_backend_unmap_dma(IOMMUFDBackend * be,uint32_t ioas_id,hwaddr iova,ram_addr_t size)270 int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
271                               hwaddr iova, ram_addr_t size)
272 {
273     int ret, fd = be->fd;
274     struct iommu_ioas_unmap unmap = {
275         .size = sizeof(unmap),
276         .ioas_id = ioas_id,
277         .iova = iova,
278         .length = size,
279     };
280 
281     if (cpr_is_incoming()) {
282         return 0;
283     }
284 
285     ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap);
286     /*
287      * IOMMUFD takes mapping as some kind of object, unmapping
288      * nonexistent mapping is treated as deleting a nonexistent
289      * object and return ENOENT. This is different from legacy
290      * backend which allows it. vIOMMU may trigger a lot of
291      * redundant unmapping, to avoid flush the log, treat them
292      * as succeess for IOMMUFD just like legacy backend.
293      */
294     if (ret && errno == ENOENT) {
295         trace_iommufd_backend_unmap_dma_non_exist(fd, ioas_id, iova, size, ret);
296         ret = 0;
297     } else {
298         trace_iommufd_backend_unmap_dma(fd, ioas_id, iova, size, ret);
299     }
300 
301     if (ret) {
302         ret = -errno;
303     }
304     return ret;
305 }
306 
iommufd_backend_alloc_hwpt(IOMMUFDBackend * be,uint32_t dev_id,uint32_t pt_id,uint32_t flags,uint32_t data_type,uint32_t data_len,void * data_ptr,uint32_t * out_hwpt,Error ** errp)307 bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
308                                 uint32_t pt_id, uint32_t flags,
309                                 uint32_t data_type, uint32_t data_len,
310                                 void *data_ptr, uint32_t *out_hwpt,
311                                 Error **errp)
312 {
313     int ret, fd = be->fd;
314     struct iommu_hwpt_alloc alloc_hwpt = {
315         .size = sizeof(struct iommu_hwpt_alloc),
316         .flags = flags,
317         .dev_id = dev_id,
318         .pt_id = pt_id,
319         .data_type = data_type,
320         .data_len = data_len,
321         .data_uptr = (uintptr_t)data_ptr,
322     };
323 
324     ret = ioctl(fd, IOMMU_HWPT_ALLOC, &alloc_hwpt);
325     trace_iommufd_backend_alloc_hwpt(fd, dev_id, pt_id, flags, data_type,
326                                      data_len, (uintptr_t)data_ptr,
327                                      alloc_hwpt.out_hwpt_id, ret);
328     if (ret) {
329         error_setg_errno(errp, errno, "Failed to allocate hwpt");
330         return false;
331     }
332 
333     *out_hwpt = alloc_hwpt.out_hwpt_id;
334     return true;
335 }
336 
iommufd_backend_set_dirty_tracking(IOMMUFDBackend * be,uint32_t hwpt_id,bool start,Error ** errp)337 bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be,
338                                         uint32_t hwpt_id, bool start,
339                                         Error **errp)
340 {
341     int ret;
342     struct iommu_hwpt_set_dirty_tracking set_dirty = {
343             .size = sizeof(set_dirty),
344             .hwpt_id = hwpt_id,
345             .flags = start ? IOMMU_HWPT_DIRTY_TRACKING_ENABLE : 0,
346     };
347 
348     ret = ioctl(be->fd, IOMMU_HWPT_SET_DIRTY_TRACKING, &set_dirty);
349     trace_iommufd_backend_set_dirty(be->fd, hwpt_id, start, ret ? errno : 0);
350     if (ret) {
351         error_setg_errno(errp, errno,
352                          "IOMMU_HWPT_SET_DIRTY_TRACKING(hwpt_id %u) failed",
353                          hwpt_id);
354         return false;
355     }
356 
357     return true;
358 }
359 
iommufd_backend_get_dirty_bitmap(IOMMUFDBackend * be,uint32_t hwpt_id,uint64_t iova,ram_addr_t size,uint64_t page_size,uint64_t * data,Error ** errp)360 bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be,
361                                       uint32_t hwpt_id,
362                                       uint64_t iova, ram_addr_t size,
363                                       uint64_t page_size, uint64_t *data,
364                                       Error **errp)
365 {
366     int ret;
367     struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap = {
368         .size = sizeof(get_dirty_bitmap),
369         .hwpt_id = hwpt_id,
370         .iova = iova,
371         .length = size,
372         .page_size = page_size,
373         .data = (uintptr_t)data,
374     };
375 
376     ret = ioctl(be->fd, IOMMU_HWPT_GET_DIRTY_BITMAP, &get_dirty_bitmap);
377     trace_iommufd_backend_get_dirty_bitmap(be->fd, hwpt_id, iova, size,
378                                            page_size, ret ? errno : 0);
379     if (ret) {
380         error_setg_errno(errp, errno,
381                          "IOMMU_HWPT_GET_DIRTY_BITMAP (iova: 0x%"HWADDR_PRIx
382                          " size: 0x"RAM_ADDR_FMT") failed", iova, size);
383         return false;
384     }
385 
386     return true;
387 }
388 
iommufd_backend_get_device_info(IOMMUFDBackend * be,uint32_t devid,uint32_t * type,void * data,uint32_t len,uint64_t * caps,Error ** errp)389 bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
390                                      uint32_t *type, void *data, uint32_t len,
391                                      uint64_t *caps, Error **errp)
392 {
393     struct iommu_hw_info info = {
394         .size = sizeof(info),
395         .dev_id = devid,
396         .data_len = len,
397         .data_uptr = (uintptr_t)data,
398     };
399 
400     if (ioctl(be->fd, IOMMU_GET_HW_INFO, &info)) {
401         error_setg_errno(errp, errno, "Failed to get hardware info");
402         return false;
403     }
404 
405     g_assert(type);
406     *type = info.out_data_type;
407     g_assert(caps);
408     *caps = info.out_capabilities;
409 
410     return true;
411 }
412 
iommufd_backend_invalidate_cache(IOMMUFDBackend * be,uint32_t id,uint32_t data_type,uint32_t entry_len,uint32_t * entry_num,void * data,Error ** errp)413 bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id,
414                                       uint32_t data_type, uint32_t entry_len,
415                                       uint32_t *entry_num, void *data,
416                                       Error **errp)
417 {
418     int ret, fd = be->fd;
419     uint32_t total_entries = *entry_num;
420     struct iommu_hwpt_invalidate cache = {
421         .size = sizeof(cache),
422         .hwpt_id = id,
423         .data_type = data_type,
424         .entry_len = entry_len,
425         .entry_num = total_entries,
426         .data_uptr = (uintptr_t)data,
427     };
428 
429     ret = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cache);
430     trace_iommufd_backend_invalidate_cache(fd, id, data_type, entry_len,
431                                            total_entries, cache.entry_num,
432                                            (uintptr_t)data, ret ? errno : 0);
433     *entry_num = cache.entry_num;
434 
435     if (ret) {
436         error_setg_errno(errp, errno, "IOMMU_HWPT_INVALIDATE failed:"
437                          " total %d entries, processed %d entries",
438                          total_entries, cache.entry_num);
439     } else if (total_entries != cache.entry_num) {
440         error_setg(errp, "IOMMU_HWPT_INVALIDATE succeed but with unprocessed"
441                          " entries: total %d entries, processed %d entries."
442                          " Kernel BUG?!", total_entries, cache.entry_num);
443         return false;
444     }
445 
446     return !ret;
447 }
448 
host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD * idev,uint32_t hwpt_id,Error ** errp)449 bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
450                                            uint32_t hwpt_id, Error **errp)
451 {
452     HostIOMMUDeviceIOMMUFDClass *idevc =
453         HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev);
454 
455     g_assert(idevc->attach_hwpt);
456     return idevc->attach_hwpt(idev, hwpt_id, errp);
457 }
458 
host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD * idev,Error ** errp)459 bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
460                                            Error **errp)
461 {
462     HostIOMMUDeviceIOMMUFDClass *idevc =
463         HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev);
464 
465     g_assert(idevc->detach_hwpt);
466     return idevc->detach_hwpt(idev, errp);
467 }
468 
hiod_iommufd_get_cap(HostIOMMUDevice * hiod,int cap,Error ** errp)469 static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp)
470 {
471     HostIOMMUDeviceCaps *caps = &hiod->caps;
472 
473     switch (cap) {
474     case HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE:
475         return caps->type;
476     case HOST_IOMMU_DEVICE_CAP_AW_BITS:
477         return vfio_device_get_aw_bits(hiod->agent);
478     default:
479         error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
480         return -EINVAL;
481     }
482 }
483 
hiod_iommufd_class_init(ObjectClass * oc,const void * data)484 static void hiod_iommufd_class_init(ObjectClass *oc, const void *data)
485 {
486     HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc);
487 
488     hioc->get_cap = hiod_iommufd_get_cap;
489 };
490 
491 static const TypeInfo types[] = {
492     {
493         .name = TYPE_IOMMUFD_BACKEND,
494         .parent = TYPE_OBJECT,
495         .instance_size = sizeof(IOMMUFDBackend),
496         .instance_init = iommufd_backend_init,
497         .instance_finalize = iommufd_backend_finalize,
498         .class_size = sizeof(IOMMUFDBackendClass),
499         .class_init = iommufd_backend_class_init,
500         .interfaces = (const InterfaceInfo[]) {
501             { TYPE_USER_CREATABLE },
502             { }
503         }
504     }, {
505         .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD,
506         .parent = TYPE_HOST_IOMMU_DEVICE,
507         .instance_size = sizeof(HostIOMMUDeviceIOMMUFD),
508         .class_size = sizeof(HostIOMMUDeviceIOMMUFDClass),
509         .class_init = hiod_iommufd_class_init,
510         .abstract = true,
511     }
512 };
513 
514 DEFINE_TYPES(types)
515