1 /*
2 * iommufd container backend
3 *
4 * Copyright (C) 2023 Intel Corporation.
5 * Copyright Red Hat, Inc. 2023
6 *
7 * Authors: Yi Liu <yi.l.liu@intel.com>
8 * Eric Auger <eric.auger@redhat.com>
9 *
10 * SPDX-License-Identifier: GPL-2.0-or-later
11 */
12
13 #include "qemu/osdep.h"
14 #include "system/iommufd.h"
15 #include "qapi/error.h"
16 #include "qemu/module.h"
17 #include "qom/object_interfaces.h"
18 #include "qemu/error-report.h"
19 #include "monitor/monitor.h"
20 #include "trace.h"
21 #include "hw/vfio/vfio-device.h"
22 #include <sys/ioctl.h>
23 #include <linux/iommufd.h>
24
iommufd_backend_init(Object * obj)25 static void iommufd_backend_init(Object *obj)
26 {
27 IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
28
29 be->fd = -1;
30 be->users = 0;
31 be->owned = true;
32 }
33
iommufd_backend_finalize(Object * obj)34 static void iommufd_backend_finalize(Object *obj)
35 {
36 IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
37
38 if (be->owned) {
39 close(be->fd);
40 be->fd = -1;
41 }
42 }
43
iommufd_backend_set_fd(Object * obj,const char * str,Error ** errp)44 static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp)
45 {
46 ERRP_GUARD();
47 IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
48 int fd = -1;
49
50 fd = monitor_fd_param(monitor_cur(), str, errp);
51 if (fd == -1) {
52 error_prepend(errp, "Could not parse remote object fd %s:", str);
53 return;
54 }
55 be->fd = fd;
56 be->owned = false;
57 trace_iommu_backend_set_fd(be->fd);
58 }
59
iommufd_backend_can_be_deleted(UserCreatable * uc)60 static bool iommufd_backend_can_be_deleted(UserCreatable *uc)
61 {
62 IOMMUFDBackend *be = IOMMUFD_BACKEND(uc);
63
64 return !be->users;
65 }
66
iommufd_backend_class_init(ObjectClass * oc,const void * data)67 static void iommufd_backend_class_init(ObjectClass *oc, const void *data)
68 {
69 UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
70
71 ucc->can_be_deleted = iommufd_backend_can_be_deleted;
72
73 object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd);
74 }
75
iommufd_backend_connect(IOMMUFDBackend * be,Error ** errp)76 bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
77 {
78 int fd;
79
80 if (be->owned && !be->users) {
81 fd = qemu_open("/dev/iommu", O_RDWR, errp);
82 if (fd < 0) {
83 return false;
84 }
85 be->fd = fd;
86 }
87 be->users++;
88
89 trace_iommufd_backend_connect(be->fd, be->owned, be->users);
90 return true;
91 }
92
iommufd_backend_disconnect(IOMMUFDBackend * be)93 void iommufd_backend_disconnect(IOMMUFDBackend *be)
94 {
95 if (!be->users) {
96 goto out;
97 }
98 be->users--;
99 if (!be->users && be->owned) {
100 close(be->fd);
101 be->fd = -1;
102 }
103 out:
104 trace_iommufd_backend_disconnect(be->fd, be->users);
105 }
106
iommufd_backend_alloc_ioas(IOMMUFDBackend * be,uint32_t * ioas_id,Error ** errp)107 bool iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
108 Error **errp)
109 {
110 int fd = be->fd;
111 struct iommu_ioas_alloc alloc_data = {
112 .size = sizeof(alloc_data),
113 .flags = 0,
114 };
115
116 if (ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data)) {
117 error_setg_errno(errp, errno, "Failed to allocate ioas");
118 return false;
119 }
120
121 *ioas_id = alloc_data.out_ioas_id;
122 trace_iommufd_backend_alloc_ioas(fd, *ioas_id);
123
124 return true;
125 }
126
iommufd_backend_free_id(IOMMUFDBackend * be,uint32_t id)127 void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id)
128 {
129 int ret, fd = be->fd;
130 struct iommu_destroy des = {
131 .size = sizeof(des),
132 .id = id,
133 };
134
135 ret = ioctl(fd, IOMMU_DESTROY, &des);
136 trace_iommufd_backend_free_id(fd, id, ret);
137 if (ret) {
138 error_report("Failed to free id: %u %m", id);
139 }
140 }
141
iommufd_backend_map_dma(IOMMUFDBackend * be,uint32_t ioas_id,hwaddr iova,ram_addr_t size,void * vaddr,bool readonly)142 int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
143 ram_addr_t size, void *vaddr, bool readonly)
144 {
145 int ret, fd = be->fd;
146 struct iommu_ioas_map map = {
147 .size = sizeof(map),
148 .flags = IOMMU_IOAS_MAP_READABLE |
149 IOMMU_IOAS_MAP_FIXED_IOVA,
150 .ioas_id = ioas_id,
151 .__reserved = 0,
152 .user_va = (uintptr_t)vaddr,
153 .iova = iova,
154 .length = size,
155 };
156
157 if (!readonly) {
158 map.flags |= IOMMU_IOAS_MAP_WRITEABLE;
159 }
160
161 ret = ioctl(fd, IOMMU_IOAS_MAP, &map);
162 trace_iommufd_backend_map_dma(fd, ioas_id, iova, size,
163 vaddr, readonly, ret);
164 if (ret) {
165 ret = -errno;
166
167 /* TODO: Not support mapping hardware PCI BAR region for now. */
168 if (errno == EFAULT) {
169 warn_report("IOMMU_IOAS_MAP failed: %m, PCI BAR?");
170 }
171 }
172 return ret;
173 }
174
iommufd_backend_unmap_dma(IOMMUFDBackend * be,uint32_t ioas_id,hwaddr iova,ram_addr_t size)175 int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
176 hwaddr iova, ram_addr_t size)
177 {
178 int ret, fd = be->fd;
179 struct iommu_ioas_unmap unmap = {
180 .size = sizeof(unmap),
181 .ioas_id = ioas_id,
182 .iova = iova,
183 .length = size,
184 };
185
186 ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap);
187 /*
188 * IOMMUFD takes mapping as some kind of object, unmapping
189 * nonexistent mapping is treated as deleting a nonexistent
190 * object and return ENOENT. This is different from legacy
191 * backend which allows it. vIOMMU may trigger a lot of
192 * redundant unmapping, to avoid flush the log, treat them
193 * as succeess for IOMMUFD just like legacy backend.
194 */
195 if (ret && errno == ENOENT) {
196 trace_iommufd_backend_unmap_dma_non_exist(fd, ioas_id, iova, size, ret);
197 ret = 0;
198 } else {
199 trace_iommufd_backend_unmap_dma(fd, ioas_id, iova, size, ret);
200 }
201
202 if (ret) {
203 ret = -errno;
204 }
205 return ret;
206 }
207
iommufd_backend_alloc_hwpt(IOMMUFDBackend * be,uint32_t dev_id,uint32_t pt_id,uint32_t flags,uint32_t data_type,uint32_t data_len,void * data_ptr,uint32_t * out_hwpt,Error ** errp)208 bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
209 uint32_t pt_id, uint32_t flags,
210 uint32_t data_type, uint32_t data_len,
211 void *data_ptr, uint32_t *out_hwpt,
212 Error **errp)
213 {
214 int ret, fd = be->fd;
215 struct iommu_hwpt_alloc alloc_hwpt = {
216 .size = sizeof(struct iommu_hwpt_alloc),
217 .flags = flags,
218 .dev_id = dev_id,
219 .pt_id = pt_id,
220 .data_type = data_type,
221 .data_len = data_len,
222 .data_uptr = (uintptr_t)data_ptr,
223 };
224
225 ret = ioctl(fd, IOMMU_HWPT_ALLOC, &alloc_hwpt);
226 trace_iommufd_backend_alloc_hwpt(fd, dev_id, pt_id, flags, data_type,
227 data_len, (uintptr_t)data_ptr,
228 alloc_hwpt.out_hwpt_id, ret);
229 if (ret) {
230 error_setg_errno(errp, errno, "Failed to allocate hwpt");
231 return false;
232 }
233
234 *out_hwpt = alloc_hwpt.out_hwpt_id;
235 return true;
236 }
237
iommufd_backend_set_dirty_tracking(IOMMUFDBackend * be,uint32_t hwpt_id,bool start,Error ** errp)238 bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be,
239 uint32_t hwpt_id, bool start,
240 Error **errp)
241 {
242 int ret;
243 struct iommu_hwpt_set_dirty_tracking set_dirty = {
244 .size = sizeof(set_dirty),
245 .hwpt_id = hwpt_id,
246 .flags = start ? IOMMU_HWPT_DIRTY_TRACKING_ENABLE : 0,
247 };
248
249 ret = ioctl(be->fd, IOMMU_HWPT_SET_DIRTY_TRACKING, &set_dirty);
250 trace_iommufd_backend_set_dirty(be->fd, hwpt_id, start, ret ? errno : 0);
251 if (ret) {
252 error_setg_errno(errp, errno,
253 "IOMMU_HWPT_SET_DIRTY_TRACKING(hwpt_id %u) failed",
254 hwpt_id);
255 return false;
256 }
257
258 return true;
259 }
260
iommufd_backend_get_dirty_bitmap(IOMMUFDBackend * be,uint32_t hwpt_id,uint64_t iova,ram_addr_t size,uint64_t page_size,uint64_t * data,Error ** errp)261 bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be,
262 uint32_t hwpt_id,
263 uint64_t iova, ram_addr_t size,
264 uint64_t page_size, uint64_t *data,
265 Error **errp)
266 {
267 int ret;
268 struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap = {
269 .size = sizeof(get_dirty_bitmap),
270 .hwpt_id = hwpt_id,
271 .iova = iova,
272 .length = size,
273 .page_size = page_size,
274 .data = (uintptr_t)data,
275 };
276
277 ret = ioctl(be->fd, IOMMU_HWPT_GET_DIRTY_BITMAP, &get_dirty_bitmap);
278 trace_iommufd_backend_get_dirty_bitmap(be->fd, hwpt_id, iova, size,
279 page_size, ret ? errno : 0);
280 if (ret) {
281 error_setg_errno(errp, errno,
282 "IOMMU_HWPT_GET_DIRTY_BITMAP (iova: 0x%"HWADDR_PRIx
283 " size: 0x"RAM_ADDR_FMT") failed", iova, size);
284 return false;
285 }
286
287 return true;
288 }
289
iommufd_backend_get_device_info(IOMMUFDBackend * be,uint32_t devid,uint32_t * type,void * data,uint32_t len,uint64_t * caps,Error ** errp)290 bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
291 uint32_t *type, void *data, uint32_t len,
292 uint64_t *caps, Error **errp)
293 {
294 struct iommu_hw_info info = {
295 .size = sizeof(info),
296 .dev_id = devid,
297 .data_len = len,
298 .data_uptr = (uintptr_t)data,
299 };
300
301 if (ioctl(be->fd, IOMMU_GET_HW_INFO, &info)) {
302 error_setg_errno(errp, errno, "Failed to get hardware info");
303 return false;
304 }
305
306 g_assert(type);
307 *type = info.out_data_type;
308 g_assert(caps);
309 *caps = info.out_capabilities;
310
311 return true;
312 }
313
iommufd_backend_invalidate_cache(IOMMUFDBackend * be,uint32_t id,uint32_t data_type,uint32_t entry_len,uint32_t * entry_num,void * data,Error ** errp)314 bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id,
315 uint32_t data_type, uint32_t entry_len,
316 uint32_t *entry_num, void *data,
317 Error **errp)
318 {
319 int ret, fd = be->fd;
320 uint32_t total_entries = *entry_num;
321 struct iommu_hwpt_invalidate cache = {
322 .size = sizeof(cache),
323 .hwpt_id = id,
324 .data_type = data_type,
325 .entry_len = entry_len,
326 .entry_num = total_entries,
327 .data_uptr = (uintptr_t)data,
328 };
329
330 ret = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cache);
331 trace_iommufd_backend_invalidate_cache(fd, id, data_type, entry_len,
332 total_entries, cache.entry_num,
333 (uintptr_t)data, ret ? errno : 0);
334 *entry_num = cache.entry_num;
335
336 if (ret) {
337 error_setg_errno(errp, errno, "IOMMU_HWPT_INVALIDATE failed:"
338 " total %d entries, processed %d entries",
339 total_entries, cache.entry_num);
340 } else if (total_entries != cache.entry_num) {
341 error_setg(errp, "IOMMU_HWPT_INVALIDATE succeed but with unprocessed"
342 " entries: total %d entries, processed %d entries."
343 " Kernel BUG?!", total_entries, cache.entry_num);
344 return false;
345 }
346
347 return !ret;
348 }
349
host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD * idev,uint32_t hwpt_id,Error ** errp)350 bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
351 uint32_t hwpt_id, Error **errp)
352 {
353 HostIOMMUDeviceIOMMUFDClass *idevc =
354 HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev);
355
356 g_assert(idevc->attach_hwpt);
357 return idevc->attach_hwpt(idev, hwpt_id, errp);
358 }
359
host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD * idev,Error ** errp)360 bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
361 Error **errp)
362 {
363 HostIOMMUDeviceIOMMUFDClass *idevc =
364 HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev);
365
366 g_assert(idevc->detach_hwpt);
367 return idevc->detach_hwpt(idev, errp);
368 }
369
hiod_iommufd_get_cap(HostIOMMUDevice * hiod,int cap,Error ** errp)370 static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp)
371 {
372 HostIOMMUDeviceCaps *caps = &hiod->caps;
373
374 switch (cap) {
375 case HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE:
376 return caps->type;
377 case HOST_IOMMU_DEVICE_CAP_AW_BITS:
378 return vfio_device_get_aw_bits(hiod->agent);
379 default:
380 error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
381 return -EINVAL;
382 }
383 }
384
hiod_iommufd_class_init(ObjectClass * oc,const void * data)385 static void hiod_iommufd_class_init(ObjectClass *oc, const void *data)
386 {
387 HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc);
388
389 hioc->get_cap = hiod_iommufd_get_cap;
390 };
391
392 static const TypeInfo types[] = {
393 {
394 .name = TYPE_IOMMUFD_BACKEND,
395 .parent = TYPE_OBJECT,
396 .instance_size = sizeof(IOMMUFDBackend),
397 .instance_init = iommufd_backend_init,
398 .instance_finalize = iommufd_backend_finalize,
399 .class_size = sizeof(IOMMUFDBackendClass),
400 .class_init = iommufd_backend_class_init,
401 .interfaces = (const InterfaceInfo[]) {
402 { TYPE_USER_CREATABLE },
403 { }
404 }
405 }, {
406 .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD,
407 .parent = TYPE_HOST_IOMMU_DEVICE,
408 .instance_size = sizeof(HostIOMMUDeviceIOMMUFD),
409 .class_size = sizeof(HostIOMMUDeviceIOMMUFDClass),
410 .class_init = hiod_iommufd_class_init,
411 .abstract = true,
412 }
413 };
414
415 DEFINE_TYPES(types)
416