1 /*
2 * iommufd container backend
3 *
4 * Copyright (C) 2023 Intel Corporation.
5 * Copyright Red Hat, Inc. 2023
6 *
7 * Authors: Yi Liu <yi.l.liu@intel.com>
8 * Eric Auger <eric.auger@redhat.com>
9 *
10 * SPDX-License-Identifier: GPL-2.0-or-later
11 */
12
13 #include "qemu/osdep.h"
14 #include "system/iommufd.h"
15 #include "qapi/error.h"
16 #include "qemu/module.h"
17 #include "qom/object_interfaces.h"
18 #include "qemu/error-report.h"
19 #include "migration/cpr.h"
20 #include "monitor/monitor.h"
21 #include "trace.h"
22 #include "hw/vfio/vfio-device.h"
23 #include <sys/ioctl.h>
24 #include <linux/iommufd.h>
25
iommufd_fd_name(IOMMUFDBackend * be)26 static const char *iommufd_fd_name(IOMMUFDBackend *be)
27 {
28 return object_get_canonical_path_component(OBJECT(be));
29 }
30
iommufd_backend_init(Object * obj)31 static void iommufd_backend_init(Object *obj)
32 {
33 IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
34
35 be->fd = -1;
36 be->users = 0;
37 be->owned = true;
38 }
39
iommufd_backend_finalize(Object * obj)40 static void iommufd_backend_finalize(Object *obj)
41 {
42 IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
43
44 if (be->owned) {
45 close(be->fd);
46 be->fd = -1;
47 }
48 }
49
iommufd_backend_set_fd(Object * obj,const char * str,Error ** errp)50 static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp)
51 {
52 ERRP_GUARD();
53 IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
54 int fd = -1;
55
56 fd = monitor_fd_param(monitor_cur(), str, errp);
57 if (fd == -1) {
58 error_prepend(errp, "Could not parse remote object fd %s:", str);
59 return;
60 }
61 be->fd = fd;
62 be->owned = false;
63 trace_iommu_backend_set_fd(be->fd);
64 }
65
iommufd_backend_can_be_deleted(UserCreatable * uc)66 static bool iommufd_backend_can_be_deleted(UserCreatable *uc)
67 {
68 IOMMUFDBackend *be = IOMMUFD_BACKEND(uc);
69
70 return !be->users;
71 }
72
iommufd_backend_complete(UserCreatable * uc,Error ** errp)73 static void iommufd_backend_complete(UserCreatable *uc, Error **errp)
74 {
75 IOMMUFDBackend *be = IOMMUFD_BACKEND(uc);
76 const char *name = iommufd_fd_name(be);
77
78 if (!be->owned) {
79 /* fd came from the command line. Fetch updated value from cpr state. */
80 if (cpr_is_incoming()) {
81 be->fd = cpr_find_fd(name, 0);
82 } else {
83 cpr_save_fd(name, 0, be->fd);
84 }
85 }
86 }
87
iommufd_backend_class_init(ObjectClass * oc,const void * data)88 static void iommufd_backend_class_init(ObjectClass *oc, const void *data)
89 {
90 UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
91
92 ucc->can_be_deleted = iommufd_backend_can_be_deleted;
93 ucc->complete = iommufd_backend_complete;
94
95 object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd);
96 }
97
iommufd_change_process_capable(IOMMUFDBackend * be)98 bool iommufd_change_process_capable(IOMMUFDBackend *be)
99 {
100 struct iommu_ioas_change_process args = {.size = sizeof(args)};
101
102 /*
103 * Call IOMMU_IOAS_CHANGE_PROCESS to verify it is a recognized ioctl.
104 * This is a no-op if the process has not changed since DMA was mapped.
105 */
106 return !ioctl(be->fd, IOMMU_IOAS_CHANGE_PROCESS, &args);
107 }
108
iommufd_change_process(IOMMUFDBackend * be,Error ** errp)109 bool iommufd_change_process(IOMMUFDBackend *be, Error **errp)
110 {
111 struct iommu_ioas_change_process args = {.size = sizeof(args)};
112 bool ret = !ioctl(be->fd, IOMMU_IOAS_CHANGE_PROCESS, &args);
113
114 if (!ret) {
115 error_setg_errno(errp, errno, "IOMMU_IOAS_CHANGE_PROCESS fd %d failed",
116 be->fd);
117 }
118 trace_iommufd_change_process(be->fd, ret);
119 return ret;
120 }
121
iommufd_backend_connect(IOMMUFDBackend * be,Error ** errp)122 bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
123 {
124 int fd;
125
126 if (be->owned && !be->users) {
127 fd = cpr_open_fd("/dev/iommu", O_RDWR, iommufd_fd_name(be), 0, errp);
128 if (fd < 0) {
129 return false;
130 }
131 be->fd = fd;
132 }
133 if (!be->users && !vfio_iommufd_cpr_register_iommufd(be, errp)) {
134 if (be->owned) {
135 close(be->fd);
136 be->fd = -1;
137 }
138 return false;
139 }
140 be->users++;
141
142 trace_iommufd_backend_connect(be->fd, be->owned, be->users);
143 return true;
144 }
145
iommufd_backend_disconnect(IOMMUFDBackend * be)146 void iommufd_backend_disconnect(IOMMUFDBackend *be)
147 {
148 if (!be->users) {
149 goto out;
150 }
151 be->users--;
152 if (!be->users) {
153 vfio_iommufd_cpr_unregister_iommufd(be);
154 if (be->owned) {
155 cpr_delete_fd(iommufd_fd_name(be), 0);
156 close(be->fd);
157 be->fd = -1;
158 }
159 }
160 out:
161 trace_iommufd_backend_disconnect(be->fd, be->users);
162 }
163
iommufd_backend_alloc_ioas(IOMMUFDBackend * be,uint32_t * ioas_id,Error ** errp)164 bool iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
165 Error **errp)
166 {
167 int fd = be->fd;
168 struct iommu_ioas_alloc alloc_data = {
169 .size = sizeof(alloc_data),
170 .flags = 0,
171 };
172
173 if (ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data)) {
174 error_setg_errno(errp, errno, "Failed to allocate ioas");
175 return false;
176 }
177
178 *ioas_id = alloc_data.out_ioas_id;
179 trace_iommufd_backend_alloc_ioas(fd, *ioas_id);
180
181 return true;
182 }
183
iommufd_backend_free_id(IOMMUFDBackend * be,uint32_t id)184 void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id)
185 {
186 int ret, fd = be->fd;
187 struct iommu_destroy des = {
188 .size = sizeof(des),
189 .id = id,
190 };
191
192 ret = ioctl(fd, IOMMU_DESTROY, &des);
193 trace_iommufd_backend_free_id(fd, id, ret);
194 if (ret) {
195 error_report("Failed to free id: %u %m", id);
196 }
197 }
198
iommufd_backend_map_dma(IOMMUFDBackend * be,uint32_t ioas_id,hwaddr iova,ram_addr_t size,void * vaddr,bool readonly)199 int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
200 ram_addr_t size, void *vaddr, bool readonly)
201 {
202 int ret, fd = be->fd;
203 struct iommu_ioas_map map = {
204 .size = sizeof(map),
205 .flags = IOMMU_IOAS_MAP_READABLE |
206 IOMMU_IOAS_MAP_FIXED_IOVA,
207 .ioas_id = ioas_id,
208 .__reserved = 0,
209 .user_va = (uintptr_t)vaddr,
210 .iova = iova,
211 .length = size,
212 };
213
214 if (!readonly) {
215 map.flags |= IOMMU_IOAS_MAP_WRITEABLE;
216 }
217
218 ret = ioctl(fd, IOMMU_IOAS_MAP, &map);
219 trace_iommufd_backend_map_dma(fd, ioas_id, iova, size,
220 vaddr, readonly, ret);
221 if (ret) {
222 ret = -errno;
223
224 /* TODO: Not support mapping hardware PCI BAR region for now. */
225 if (errno == EFAULT) {
226 warn_report("IOMMU_IOAS_MAP failed: %m, PCI BAR?");
227 }
228 }
229 return ret;
230 }
231
iommufd_backend_map_file_dma(IOMMUFDBackend * be,uint32_t ioas_id,hwaddr iova,ram_addr_t size,int mfd,unsigned long start,bool readonly)232 int iommufd_backend_map_file_dma(IOMMUFDBackend *be, uint32_t ioas_id,
233 hwaddr iova, ram_addr_t size,
234 int mfd, unsigned long start, bool readonly)
235 {
236 int ret, fd = be->fd;
237 struct iommu_ioas_map_file map = {
238 .size = sizeof(map),
239 .flags = IOMMU_IOAS_MAP_READABLE |
240 IOMMU_IOAS_MAP_FIXED_IOVA,
241 .ioas_id = ioas_id,
242 .fd = mfd,
243 .start = start,
244 .iova = iova,
245 .length = size,
246 };
247
248 if (cpr_is_incoming()) {
249 return 0;
250 }
251
252 if (!readonly) {
253 map.flags |= IOMMU_IOAS_MAP_WRITEABLE;
254 }
255
256 ret = ioctl(fd, IOMMU_IOAS_MAP_FILE, &map);
257 trace_iommufd_backend_map_file_dma(fd, ioas_id, iova, size, mfd, start,
258 readonly, ret);
259 if (ret) {
260 ret = -errno;
261
262 /* TODO: Not support mapping hardware PCI BAR region for now. */
263 if (errno == EFAULT) {
264 warn_report("IOMMU_IOAS_MAP_FILE failed: %m, PCI BAR?");
265 }
266 }
267 return ret;
268 }
269
iommufd_backend_unmap_dma(IOMMUFDBackend * be,uint32_t ioas_id,hwaddr iova,ram_addr_t size)270 int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
271 hwaddr iova, ram_addr_t size)
272 {
273 int ret, fd = be->fd;
274 struct iommu_ioas_unmap unmap = {
275 .size = sizeof(unmap),
276 .ioas_id = ioas_id,
277 .iova = iova,
278 .length = size,
279 };
280
281 if (cpr_is_incoming()) {
282 return 0;
283 }
284
285 ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap);
286 /*
287 * IOMMUFD takes mapping as some kind of object, unmapping
288 * nonexistent mapping is treated as deleting a nonexistent
289 * object and return ENOENT. This is different from legacy
290 * backend which allows it. vIOMMU may trigger a lot of
291 * redundant unmapping, to avoid flush the log, treat them
292 * as succeess for IOMMUFD just like legacy backend.
293 */
294 if (ret && errno == ENOENT) {
295 trace_iommufd_backend_unmap_dma_non_exist(fd, ioas_id, iova, size, ret);
296 ret = 0;
297 } else {
298 trace_iommufd_backend_unmap_dma(fd, ioas_id, iova, size, ret);
299 }
300
301 if (ret) {
302 ret = -errno;
303 }
304 return ret;
305 }
306
iommufd_backend_alloc_hwpt(IOMMUFDBackend * be,uint32_t dev_id,uint32_t pt_id,uint32_t flags,uint32_t data_type,uint32_t data_len,void * data_ptr,uint32_t * out_hwpt,Error ** errp)307 bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
308 uint32_t pt_id, uint32_t flags,
309 uint32_t data_type, uint32_t data_len,
310 void *data_ptr, uint32_t *out_hwpt,
311 Error **errp)
312 {
313 int ret, fd = be->fd;
314 struct iommu_hwpt_alloc alloc_hwpt = {
315 .size = sizeof(struct iommu_hwpt_alloc),
316 .flags = flags,
317 .dev_id = dev_id,
318 .pt_id = pt_id,
319 .data_type = data_type,
320 .data_len = data_len,
321 .data_uptr = (uintptr_t)data_ptr,
322 };
323
324 ret = ioctl(fd, IOMMU_HWPT_ALLOC, &alloc_hwpt);
325 trace_iommufd_backend_alloc_hwpt(fd, dev_id, pt_id, flags, data_type,
326 data_len, (uintptr_t)data_ptr,
327 alloc_hwpt.out_hwpt_id, ret);
328 if (ret) {
329 error_setg_errno(errp, errno, "Failed to allocate hwpt");
330 return false;
331 }
332
333 *out_hwpt = alloc_hwpt.out_hwpt_id;
334 return true;
335 }
336
iommufd_backend_set_dirty_tracking(IOMMUFDBackend * be,uint32_t hwpt_id,bool start,Error ** errp)337 bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be,
338 uint32_t hwpt_id, bool start,
339 Error **errp)
340 {
341 int ret;
342 struct iommu_hwpt_set_dirty_tracking set_dirty = {
343 .size = sizeof(set_dirty),
344 .hwpt_id = hwpt_id,
345 .flags = start ? IOMMU_HWPT_DIRTY_TRACKING_ENABLE : 0,
346 };
347
348 ret = ioctl(be->fd, IOMMU_HWPT_SET_DIRTY_TRACKING, &set_dirty);
349 trace_iommufd_backend_set_dirty(be->fd, hwpt_id, start, ret ? errno : 0);
350 if (ret) {
351 error_setg_errno(errp, errno,
352 "IOMMU_HWPT_SET_DIRTY_TRACKING(hwpt_id %u) failed",
353 hwpt_id);
354 return false;
355 }
356
357 return true;
358 }
359
iommufd_backend_get_dirty_bitmap(IOMMUFDBackend * be,uint32_t hwpt_id,uint64_t iova,ram_addr_t size,uint64_t page_size,uint64_t * data,Error ** errp)360 bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be,
361 uint32_t hwpt_id,
362 uint64_t iova, ram_addr_t size,
363 uint64_t page_size, uint64_t *data,
364 Error **errp)
365 {
366 int ret;
367 struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap = {
368 .size = sizeof(get_dirty_bitmap),
369 .hwpt_id = hwpt_id,
370 .iova = iova,
371 .length = size,
372 .page_size = page_size,
373 .data = (uintptr_t)data,
374 };
375
376 ret = ioctl(be->fd, IOMMU_HWPT_GET_DIRTY_BITMAP, &get_dirty_bitmap);
377 trace_iommufd_backend_get_dirty_bitmap(be->fd, hwpt_id, iova, size,
378 page_size, ret ? errno : 0);
379 if (ret) {
380 error_setg_errno(errp, errno,
381 "IOMMU_HWPT_GET_DIRTY_BITMAP (iova: 0x%"HWADDR_PRIx
382 " size: 0x"RAM_ADDR_FMT") failed", iova, size);
383 return false;
384 }
385
386 return true;
387 }
388
iommufd_backend_get_device_info(IOMMUFDBackend * be,uint32_t devid,uint32_t * type,void * data,uint32_t len,uint64_t * caps,Error ** errp)389 bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
390 uint32_t *type, void *data, uint32_t len,
391 uint64_t *caps, Error **errp)
392 {
393 struct iommu_hw_info info = {
394 .size = sizeof(info),
395 .dev_id = devid,
396 .data_len = len,
397 .data_uptr = (uintptr_t)data,
398 };
399
400 if (ioctl(be->fd, IOMMU_GET_HW_INFO, &info)) {
401 error_setg_errno(errp, errno, "Failed to get hardware info");
402 return false;
403 }
404
405 g_assert(type);
406 *type = info.out_data_type;
407 g_assert(caps);
408 *caps = info.out_capabilities;
409
410 return true;
411 }
412
iommufd_backend_invalidate_cache(IOMMUFDBackend * be,uint32_t id,uint32_t data_type,uint32_t entry_len,uint32_t * entry_num,void * data,Error ** errp)413 bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id,
414 uint32_t data_type, uint32_t entry_len,
415 uint32_t *entry_num, void *data,
416 Error **errp)
417 {
418 int ret, fd = be->fd;
419 uint32_t total_entries = *entry_num;
420 struct iommu_hwpt_invalidate cache = {
421 .size = sizeof(cache),
422 .hwpt_id = id,
423 .data_type = data_type,
424 .entry_len = entry_len,
425 .entry_num = total_entries,
426 .data_uptr = (uintptr_t)data,
427 };
428
429 ret = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cache);
430 trace_iommufd_backend_invalidate_cache(fd, id, data_type, entry_len,
431 total_entries, cache.entry_num,
432 (uintptr_t)data, ret ? errno : 0);
433 *entry_num = cache.entry_num;
434
435 if (ret) {
436 error_setg_errno(errp, errno, "IOMMU_HWPT_INVALIDATE failed:"
437 " total %d entries, processed %d entries",
438 total_entries, cache.entry_num);
439 } else if (total_entries != cache.entry_num) {
440 error_setg(errp, "IOMMU_HWPT_INVALIDATE succeed but with unprocessed"
441 " entries: total %d entries, processed %d entries."
442 " Kernel BUG?!", total_entries, cache.entry_num);
443 return false;
444 }
445
446 return !ret;
447 }
448
host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD * idev,uint32_t hwpt_id,Error ** errp)449 bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
450 uint32_t hwpt_id, Error **errp)
451 {
452 HostIOMMUDeviceIOMMUFDClass *idevc =
453 HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev);
454
455 g_assert(idevc->attach_hwpt);
456 return idevc->attach_hwpt(idev, hwpt_id, errp);
457 }
458
host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD * idev,Error ** errp)459 bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
460 Error **errp)
461 {
462 HostIOMMUDeviceIOMMUFDClass *idevc =
463 HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev);
464
465 g_assert(idevc->detach_hwpt);
466 return idevc->detach_hwpt(idev, errp);
467 }
468
hiod_iommufd_get_cap(HostIOMMUDevice * hiod,int cap,Error ** errp)469 static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp)
470 {
471 HostIOMMUDeviceCaps *caps = &hiod->caps;
472
473 switch (cap) {
474 case HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE:
475 return caps->type;
476 case HOST_IOMMU_DEVICE_CAP_AW_BITS:
477 return vfio_device_get_aw_bits(hiod->agent);
478 default:
479 error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
480 return -EINVAL;
481 }
482 }
483
hiod_iommufd_class_init(ObjectClass * oc,const void * data)484 static void hiod_iommufd_class_init(ObjectClass *oc, const void *data)
485 {
486 HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc);
487
488 hioc->get_cap = hiod_iommufd_get_cap;
489 };
490
491 static const TypeInfo types[] = {
492 {
493 .name = TYPE_IOMMUFD_BACKEND,
494 .parent = TYPE_OBJECT,
495 .instance_size = sizeof(IOMMUFDBackend),
496 .instance_init = iommufd_backend_init,
497 .instance_finalize = iommufd_backend_finalize,
498 .class_size = sizeof(IOMMUFDBackendClass),
499 .class_init = iommufd_backend_class_init,
500 .interfaces = (const InterfaceInfo[]) {
501 { TYPE_USER_CREATABLE },
502 { }
503 }
504 }, {
505 .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD,
506 .parent = TYPE_HOST_IOMMU_DEVICE,
507 .instance_size = sizeof(HostIOMMUDeviceIOMMUFD),
508 .class_size = sizeof(HostIOMMUDeviceIOMMUFDClass),
509 .class_init = hiod_iommufd_class_init,
510 .abstract = true,
511 }
512 };
513
514 DEFINE_TYPES(types)
515