xref: /qemu/linux-headers/linux/iommufd.h (revision 513823e7521a09ed7ad1e32e6454bac3b2cbf52d)
1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
3  */
4 #ifndef _IOMMUFD_H
5 #define _IOMMUFD_H
6 
7 #include <linux/ioctl.h>
8 #include <linux/types.h>
9 
10 #define IOMMUFD_TYPE (';')
11 
12 /**
13  * DOC: General ioctl format
14  *
15  * The ioctl interface follows a general format to allow for extensibility. Each
16  * ioctl is passed in a structure pointer as the argument providing the size of
17  * the structure in the first u32. The kernel checks that any structure space
18  * beyond what it understands is 0. This allows userspace to use the backward
19  * compatible portion while consistently using the newer, larger, structures.
20  *
21  * ioctls use a standard meaning for common errnos:
22  *
23  *  - ENOTTY: The IOCTL number itself is not supported at all
24  *  - E2BIG: The IOCTL number is supported, but the provided structure has
25  *    non-zero in a part the kernel does not understand.
26  *  - EOPNOTSUPP: The IOCTL number is supported, and the structure is
27  *    understood, however a known field has a value the kernel does not
28  *    understand or support.
29  *  - EINVAL: Everything about the IOCTL was understood, but a field is not
30  *    correct.
31  *  - ENOENT: An ID or IOVA provided does not exist.
32  *  - ENOMEM: Out of memory.
33  *  - EOVERFLOW: Mathematics overflowed.
34  *
35  * As well as additional errnos, within specific ioctls.
36  */
37 enum {
38 	IOMMUFD_CMD_BASE = 0x80,
39 	IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE,
40 	IOMMUFD_CMD_IOAS_ALLOC = 0x81,
41 	IOMMUFD_CMD_IOAS_ALLOW_IOVAS = 0x82,
42 	IOMMUFD_CMD_IOAS_COPY = 0x83,
43 	IOMMUFD_CMD_IOAS_IOVA_RANGES = 0x84,
44 	IOMMUFD_CMD_IOAS_MAP = 0x85,
45 	IOMMUFD_CMD_IOAS_UNMAP = 0x86,
46 	IOMMUFD_CMD_OPTION = 0x87,
47 	IOMMUFD_CMD_VFIO_IOAS = 0x88,
48 	IOMMUFD_CMD_HWPT_ALLOC = 0x89,
49 	IOMMUFD_CMD_GET_HW_INFO = 0x8a,
50 	IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING = 0x8b,
51 	IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c,
52 	IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
53 	IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
54 	IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f,
55 	IOMMUFD_CMD_VIOMMU_ALLOC = 0x90,
56 	IOMMUFD_CMD_VDEVICE_ALLOC = 0x91,
57 	IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92,
58 };
59 
60 /**
61  * struct iommu_destroy - ioctl(IOMMU_DESTROY)
62  * @size: sizeof(struct iommu_destroy)
63  * @id: iommufd object ID to destroy. Can be any destroyable object type.
64  *
65  * Destroy any object held within iommufd.
66  */
67 struct iommu_destroy {
68 	__u32 size;
69 	__u32 id;
70 };
71 #define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY)
72 
73 /**
74  * struct iommu_ioas_alloc - ioctl(IOMMU_IOAS_ALLOC)
75  * @size: sizeof(struct iommu_ioas_alloc)
76  * @flags: Must be 0
77  * @out_ioas_id: Output IOAS ID for the allocated object
78  *
79  * Allocate an IO Address Space (IOAS) which holds an IO Virtual Address (IOVA)
80  * to memory mapping.
81  */
82 struct iommu_ioas_alloc {
83 	__u32 size;
84 	__u32 flags;
85 	__u32 out_ioas_id;
86 };
87 #define IOMMU_IOAS_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOC)
88 
89 /**
90  * struct iommu_iova_range - ioctl(IOMMU_IOVA_RANGE)
91  * @start: First IOVA
92  * @last: Inclusive last IOVA
93  *
94  * An interval in IOVA space.
95  */
96 struct iommu_iova_range {
97 	__aligned_u64 start;
98 	__aligned_u64 last;
99 };
100 
101 /**
102  * struct iommu_ioas_iova_ranges - ioctl(IOMMU_IOAS_IOVA_RANGES)
103  * @size: sizeof(struct iommu_ioas_iova_ranges)
104  * @ioas_id: IOAS ID to read ranges from
105  * @num_iovas: Input/Output total number of ranges in the IOAS
106  * @__reserved: Must be 0
107  * @allowed_iovas: Pointer to the output array of struct iommu_iova_range
108  * @out_iova_alignment: Minimum alignment required for mapping IOVA
109  *
110  * Query an IOAS for ranges of allowed IOVAs. Mapping IOVA outside these ranges
111  * is not allowed. num_iovas will be set to the total number of iovas and
112  * the allowed_iovas[] will be filled in as space permits.
113  *
114  * The allowed ranges are dependent on the HW path the DMA operation takes, and
115  * can change during the lifetime of the IOAS. A fresh empty IOAS will have a
116  * full range, and each attached device will narrow the ranges based on that
117  * device's HW restrictions. Detaching a device can widen the ranges. Userspace
118  * should query ranges after every attach/detach to know what IOVAs are valid
119  * for mapping.
120  *
121  * On input num_iovas is the length of the allowed_iovas array. On output it is
122  * the total number of iovas filled in. The ioctl will return -EMSGSIZE and set
123  * num_iovas to the required value if num_iovas is too small. In this case the
124  * caller should allocate a larger output array and re-issue the ioctl.
125  *
126  * out_iova_alignment returns the minimum IOVA alignment that can be given
127  * to IOMMU_IOAS_MAP/COPY. IOVA's must satisfy::
128  *
129  *   starting_iova % out_iova_alignment == 0
130  *   (starting_iova + length) % out_iova_alignment == 0
131  *
132  * out_iova_alignment can be 1 indicating any IOVA is allowed. It cannot
133  * be higher than the system PAGE_SIZE.
134  */
135 struct iommu_ioas_iova_ranges {
136 	__u32 size;
137 	__u32 ioas_id;
138 	__u32 num_iovas;
139 	__u32 __reserved;
140 	__aligned_u64 allowed_iovas;
141 	__aligned_u64 out_iova_alignment;
142 };
143 #define IOMMU_IOAS_IOVA_RANGES _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_IOVA_RANGES)
144 
145 /**
146  * struct iommu_ioas_allow_iovas - ioctl(IOMMU_IOAS_ALLOW_IOVAS)
147  * @size: sizeof(struct iommu_ioas_allow_iovas)
148  * @ioas_id: IOAS ID to allow IOVAs from
149  * @num_iovas: Input/Output total number of ranges in the IOAS
150  * @__reserved: Must be 0
151  * @allowed_iovas: Pointer to array of struct iommu_iova_range
152  *
153  * Ensure a range of IOVAs are always available for allocation. If this call
154  * succeeds then IOMMU_IOAS_IOVA_RANGES will never return a list of IOVA ranges
155  * that are narrower than the ranges provided here. This call will fail if
156  * IOMMU_IOAS_IOVA_RANGES is currently narrower than the given ranges.
157  *
158  * When an IOAS is first created the IOVA_RANGES will be maximally sized, and as
159  * devices are attached the IOVA will narrow based on the device restrictions.
160  * When an allowed range is specified any narrowing will be refused, ie device
161  * attachment can fail if the device requires limiting within the allowed range.
162  *
163  * Automatic IOVA allocation is also impacted by this call. MAP will only
164  * allocate within the allowed IOVAs if they are present.
165  *
166  * This call replaces the entire allowed list with the given list.
167  */
168 struct iommu_ioas_allow_iovas {
169 	__u32 size;
170 	__u32 ioas_id;
171 	__u32 num_iovas;
172 	__u32 __reserved;
173 	__aligned_u64 allowed_iovas;
174 };
175 #define IOMMU_IOAS_ALLOW_IOVAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOW_IOVAS)
176 
177 /**
178  * enum iommufd_ioas_map_flags - Flags for map and copy
179  * @IOMMU_IOAS_MAP_FIXED_IOVA: If clear the kernel will compute an appropriate
180  *                             IOVA to place the mapping at
181  * @IOMMU_IOAS_MAP_WRITEABLE: DMA is allowed to write to this mapping
182  * @IOMMU_IOAS_MAP_READABLE: DMA is allowed to read from this mapping
183  */
184 enum iommufd_ioas_map_flags {
185 	IOMMU_IOAS_MAP_FIXED_IOVA = 1 << 0,
186 	IOMMU_IOAS_MAP_WRITEABLE = 1 << 1,
187 	IOMMU_IOAS_MAP_READABLE = 1 << 2,
188 };
189 
190 /**
191  * struct iommu_ioas_map - ioctl(IOMMU_IOAS_MAP)
192  * @size: sizeof(struct iommu_ioas_map)
193  * @flags: Combination of enum iommufd_ioas_map_flags
194  * @ioas_id: IOAS ID to change the mapping of
195  * @__reserved: Must be 0
196  * @user_va: Userspace pointer to start mapping from
197  * @length: Number of bytes to map
198  * @iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is set
199  *        then this must be provided as input.
200  *
201  * Set an IOVA mapping from a user pointer. If FIXED_IOVA is specified then the
202  * mapping will be established at iova, otherwise a suitable location based on
203  * the reserved and allowed lists will be automatically selected and returned in
204  * iova.
205  *
206  * If IOMMU_IOAS_MAP_FIXED_IOVA is specified then the iova range must currently
207  * be unused, existing IOVA cannot be replaced.
208  */
209 struct iommu_ioas_map {
210 	__u32 size;
211 	__u32 flags;
212 	__u32 ioas_id;
213 	__u32 __reserved;
214 	__aligned_u64 user_va;
215 	__aligned_u64 length;
216 	__aligned_u64 iova;
217 };
218 #define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP)
219 
220 /**
221  * struct iommu_ioas_map_file - ioctl(IOMMU_IOAS_MAP_FILE)
222  * @size: sizeof(struct iommu_ioas_map_file)
223  * @flags: same as for iommu_ioas_map
224  * @ioas_id: same as for iommu_ioas_map
225  * @fd: the memfd to map
226  * @start: byte offset from start of file to map from
227  * @length: same as for iommu_ioas_map
228  * @iova: same as for iommu_ioas_map
229  *
230  * Set an IOVA mapping from a memfd file.  All other arguments and semantics
231  * match those of IOMMU_IOAS_MAP.
232  */
233 struct iommu_ioas_map_file {
234 	__u32 size;
235 	__u32 flags;
236 	__u32 ioas_id;
237 	__s32 fd;
238 	__aligned_u64 start;
239 	__aligned_u64 length;
240 	__aligned_u64 iova;
241 };
242 #define IOMMU_IOAS_MAP_FILE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP_FILE)
243 
244 /**
245  * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY)
246  * @size: sizeof(struct iommu_ioas_copy)
247  * @flags: Combination of enum iommufd_ioas_map_flags
248  * @dst_ioas_id: IOAS ID to change the mapping of
249  * @src_ioas_id: IOAS ID to copy from
250  * @length: Number of bytes to copy and map
251  * @dst_iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is
252  *            set then this must be provided as input.
253  * @src_iova: IOVA to start the copy
254  *
255  * Copy an already existing mapping from src_ioas_id and establish it in
256  * dst_ioas_id. The src iova/length must exactly match a range used with
257  * IOMMU_IOAS_MAP.
258  *
259  * This may be used to efficiently clone a subset of an IOAS to another, or as a
260  * kind of 'cache' to speed up mapping. Copy has an efficiency advantage over
261  * establishing equivalent new mappings, as internal resources are shared, and
262  * the kernel will pin the user memory only once.
263  */
264 struct iommu_ioas_copy {
265 	__u32 size;
266 	__u32 flags;
267 	__u32 dst_ioas_id;
268 	__u32 src_ioas_id;
269 	__aligned_u64 length;
270 	__aligned_u64 dst_iova;
271 	__aligned_u64 src_iova;
272 };
273 #define IOMMU_IOAS_COPY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_COPY)
274 
275 /**
276  * struct iommu_ioas_unmap - ioctl(IOMMU_IOAS_UNMAP)
277  * @size: sizeof(struct iommu_ioas_unmap)
278  * @ioas_id: IOAS ID to change the mapping of
279  * @iova: IOVA to start the unmapping at
280  * @length: Number of bytes to unmap, and return back the bytes unmapped
281  *
282  * Unmap an IOVA range. The iova/length must be a superset of a previously
283  * mapped range used with IOMMU_IOAS_MAP or IOMMU_IOAS_COPY. Splitting or
284  * truncating ranges is not allowed. The values 0 to U64_MAX will unmap
285  * everything.
286  */
287 struct iommu_ioas_unmap {
288 	__u32 size;
289 	__u32 ioas_id;
290 	__aligned_u64 iova;
291 	__aligned_u64 length;
292 };
293 #define IOMMU_IOAS_UNMAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_UNMAP)
294 
295 /**
296  * enum iommufd_option - ioctl(IOMMU_OPTION_RLIMIT_MODE) and
297  *                       ioctl(IOMMU_OPTION_HUGE_PAGES)
298  * @IOMMU_OPTION_RLIMIT_MODE:
299  *    Change how RLIMIT_MEMLOCK accounting works. The caller must have privilege
300  *    to invoke this. Value 0 (default) is user based accouting, 1 uses process
301  *    based accounting. Global option, object_id must be 0
302  * @IOMMU_OPTION_HUGE_PAGES:
303  *    Value 1 (default) allows contiguous pages to be combined when generating
304  *    iommu mappings. Value 0 disables combining, everything is mapped to
305  *    PAGE_SIZE. This can be useful for benchmarking.  This is a per-IOAS
306  *    option, the object_id must be the IOAS ID.
307  */
308 enum iommufd_option {
309 	IOMMU_OPTION_RLIMIT_MODE = 0,
310 	IOMMU_OPTION_HUGE_PAGES = 1,
311 };
312 
313 /**
314  * enum iommufd_option_ops - ioctl(IOMMU_OPTION_OP_SET) and
315  *                           ioctl(IOMMU_OPTION_OP_GET)
316  * @IOMMU_OPTION_OP_SET: Set the option's value
317  * @IOMMU_OPTION_OP_GET: Get the option's value
318  */
319 enum iommufd_option_ops {
320 	IOMMU_OPTION_OP_SET = 0,
321 	IOMMU_OPTION_OP_GET = 1,
322 };
323 
324 /**
325  * struct iommu_option - iommu option multiplexer
326  * @size: sizeof(struct iommu_option)
327  * @option_id: One of enum iommufd_option
328  * @op: One of enum iommufd_option_ops
329  * @__reserved: Must be 0
330  * @object_id: ID of the object if required
331  * @val64: Option value to set or value returned on get
332  *
333  * Change a simple option value. This multiplexor allows controlling options
334  * on objects. IOMMU_OPTION_OP_SET will load an option and IOMMU_OPTION_OP_GET
335  * will return the current value.
336  */
337 struct iommu_option {
338 	__u32 size;
339 	__u32 option_id;
340 	__u16 op;
341 	__u16 __reserved;
342 	__u32 object_id;
343 	__aligned_u64 val64;
344 };
345 #define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION)
346 
347 /**
348  * enum iommufd_vfio_ioas_op - IOMMU_VFIO_IOAS_* ioctls
349  * @IOMMU_VFIO_IOAS_GET: Get the current compatibility IOAS
350  * @IOMMU_VFIO_IOAS_SET: Change the current compatibility IOAS
351  * @IOMMU_VFIO_IOAS_CLEAR: Disable VFIO compatibility
352  */
353 enum iommufd_vfio_ioas_op {
354 	IOMMU_VFIO_IOAS_GET = 0,
355 	IOMMU_VFIO_IOAS_SET = 1,
356 	IOMMU_VFIO_IOAS_CLEAR = 2,
357 };
358 
359 /**
360  * struct iommu_vfio_ioas - ioctl(IOMMU_VFIO_IOAS)
361  * @size: sizeof(struct iommu_vfio_ioas)
362  * @ioas_id: For IOMMU_VFIO_IOAS_SET the input IOAS ID to set
363  *           For IOMMU_VFIO_IOAS_GET will output the IOAS ID
364  * @op: One of enum iommufd_vfio_ioas_op
365  * @__reserved: Must be 0
366  *
367  * The VFIO compatibility support uses a single ioas because VFIO APIs do not
368  * support the ID field. Set or Get the IOAS that VFIO compatibility will use.
369  * When VFIO_GROUP_SET_CONTAINER is used on an iommufd it will get the
370  * compatibility ioas, either by taking what is already set, or auto creating
371  * one. From then on VFIO will continue to use that ioas and is not effected by
372  * this ioctl. SET or CLEAR does not destroy any auto-created IOAS.
373  */
374 struct iommu_vfio_ioas {
375 	__u32 size;
376 	__u32 ioas_id;
377 	__u16 op;
378 	__u16 __reserved;
379 };
380 #define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS)
381 
382 /**
383  * enum iommufd_hwpt_alloc_flags - Flags for HWPT allocation
384  * @IOMMU_HWPT_ALLOC_NEST_PARENT: If set, allocate a HWPT that can serve as
385  *                                the parent HWPT in a nesting configuration.
386  * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is
387  *                                   enforced on device attachment
388  * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is
389  *                             valid.
390  * @IOMMU_HWPT_ALLOC_PASID: Requests a domain that can be used with PASID. The
391  *                          domain can be attached to any PASID on the device.
392  *                          Any domain attached to the non-PASID part of the
393  *                          device must also be flaged, otherwise attaching a
394  *                          PASID will blocked.
395  *                          If IOMMU does not support PASID it will return
396  *                          error (-EOPNOTSUPP).
397  */
398 enum iommufd_hwpt_alloc_flags {
399 	IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
400 	IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
401 	IOMMU_HWPT_FAULT_ID_VALID = 1 << 2,
402 	IOMMU_HWPT_ALLOC_PASID = 1 << 3,
403 };
404 
405 /**
406  * enum iommu_hwpt_vtd_s1_flags - Intel VT-d stage-1 page table
407  *                                entry attributes
408  * @IOMMU_VTD_S1_SRE: Supervisor request
409  * @IOMMU_VTD_S1_EAFE: Extended access enable
410  * @IOMMU_VTD_S1_WPE: Write protect enable
411  */
412 enum iommu_hwpt_vtd_s1_flags {
413 	IOMMU_VTD_S1_SRE = 1 << 0,
414 	IOMMU_VTD_S1_EAFE = 1 << 1,
415 	IOMMU_VTD_S1_WPE = 1 << 2,
416 };
417 
418 /**
419  * struct iommu_hwpt_vtd_s1 - Intel VT-d stage-1 page table
420  *                            info (IOMMU_HWPT_DATA_VTD_S1)
421  * @flags: Combination of enum iommu_hwpt_vtd_s1_flags
422  * @pgtbl_addr: The base address of the stage-1 page table.
423  * @addr_width: The address width of the stage-1 page table
424  * @__reserved: Must be 0
425  */
426 struct iommu_hwpt_vtd_s1 {
427 	__aligned_u64 flags;
428 	__aligned_u64 pgtbl_addr;
429 	__u32 addr_width;
430 	__u32 __reserved;
431 };
432 
433 /**
434  * struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 nested STE
435  *                                (IOMMU_HWPT_DATA_ARM_SMMUV3)
436  *
437  * @ste: The first two double words of the user space Stream Table Entry for
438  *       the translation. Must be little-endian.
439  *       Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec)
440  *       - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax
441  *       - word-1: EATS, S1DSS, S1CIR, S1COR, S1CSH, S1STALLD
442  *
443  * -EIO will be returned if @ste is not legal or contains any non-allowed field.
444  * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass
445  * nested domain will translate the same as the nesting parent. The S1 will
446  * install a Context Descriptor Table pointing at userspace memory translated
447  * by the nesting parent.
448  */
449 struct iommu_hwpt_arm_smmuv3 {
450 	__aligned_le64 ste[2];
451 };
452 
453 /**
454  * enum iommu_hwpt_data_type - IOMMU HWPT Data Type
455  * @IOMMU_HWPT_DATA_NONE: no data
456  * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table
457  * @IOMMU_HWPT_DATA_ARM_SMMUV3: ARM SMMUv3 Context Descriptor Table
458  */
459 enum iommu_hwpt_data_type {
460 	IOMMU_HWPT_DATA_NONE = 0,
461 	IOMMU_HWPT_DATA_VTD_S1 = 1,
462 	IOMMU_HWPT_DATA_ARM_SMMUV3 = 2,
463 };
464 
465 /**
466  * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC)
467  * @size: sizeof(struct iommu_hwpt_alloc)
468  * @flags: Combination of enum iommufd_hwpt_alloc_flags
469  * @dev_id: The device to allocate this HWPT for
470  * @pt_id: The IOAS or HWPT or vIOMMU to connect this HWPT to
471  * @out_hwpt_id: The ID of the new HWPT
472  * @__reserved: Must be 0
473  * @data_type: One of enum iommu_hwpt_data_type
474  * @data_len: Length of the type specific data
475  * @data_uptr: User pointer to the type specific data
476  * @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of
477  *            IOMMU_HWPT_FAULT_ID_VALID is set.
478  * @__reserved2: Padding to 64-bit alignment. Must be 0.
479  *
480  * Explicitly allocate a hardware page table object. This is the same object
481  * type that is returned by iommufd_device_attach() and represents the
482  * underlying iommu driver's iommu_domain kernel object.
483  *
484  * A kernel-managed HWPT will be created with the mappings from the given
485  * IOAS via the @pt_id. The @data_type for this allocation must be set to
486  * IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a
487  * nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags.
488  *
489  * A user-managed nested HWPT will be created from a given vIOMMU (wrapping a
490  * parent HWPT) or a parent HWPT via @pt_id, in which the parent HWPT must be
491  * allocated previously via the same ioctl from a given IOAS (@pt_id). In this
492  * case, the @data_type must be set to a pre-defined type corresponding to an
493  * I/O page table type supported by the underlying IOMMU hardware. The device
494  * via @dev_id and the vIOMMU via @pt_id must be associated to the same IOMMU
495  * instance.
496  *
497  * If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and
498  * @data_uptr should be zero. Otherwise, both @data_len and @data_uptr
499  * must be given.
500  */
501 struct iommu_hwpt_alloc {
502 	__u32 size;
503 	__u32 flags;
504 	__u32 dev_id;
505 	__u32 pt_id;
506 	__u32 out_hwpt_id;
507 	__u32 __reserved;
508 	__u32 data_type;
509 	__u32 data_len;
510 	__aligned_u64 data_uptr;
511 	__u32 fault_id;
512 	__u32 __reserved2;
513 };
514 #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
515 
516 /**
517  * enum iommu_hw_info_vtd_flags - Flags for VT-d hw_info
518  * @IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17: If set, disallow read-only mappings
519  *                                         on a nested_parent domain.
520  *                                         https://www.intel.com/content/www/us/en/content-details/772415/content-details.html
521  */
522 enum iommu_hw_info_vtd_flags {
523 	IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 = 1 << 0,
524 };
525 
526 /**
527  * struct iommu_hw_info_vtd - Intel VT-d hardware information
528  *
529  * @flags: Combination of enum iommu_hw_info_vtd_flags
530  * @__reserved: Must be 0
531  *
532  * @cap_reg: Value of Intel VT-d capability register defined in VT-d spec
533  *           section 11.4.2 Capability Register.
534  * @ecap_reg: Value of Intel VT-d capability register defined in VT-d spec
535  *            section 11.4.3 Extended Capability Register.
536  *
537  * User needs to understand the Intel VT-d specification to decode the
538  * register value.
539  */
540 struct iommu_hw_info_vtd {
541 	__u32 flags;
542 	__u32 __reserved;
543 	__aligned_u64 cap_reg;
544 	__aligned_u64 ecap_reg;
545 };
546 
547 /**
548  * struct iommu_hw_info_arm_smmuv3 - ARM SMMUv3 hardware information
549  *                                   (IOMMU_HW_INFO_TYPE_ARM_SMMUV3)
550  *
551  * @flags: Must be set to 0
552  * @__reserved: Must be 0
553  * @idr: Implemented features for ARM SMMU Non-secure programming interface
554  * @iidr: Information about the implementation and implementer of ARM SMMU,
555  *        and architecture version supported
556  * @aidr: ARM SMMU architecture version
557  *
558  * For the details of @idr, @iidr and @aidr, please refer to the chapters
559  * from 6.3.1 to 6.3.6 in the SMMUv3 Spec.
560  *
561  * User space should read the underlying ARM SMMUv3 hardware information for
562  * the list of supported features.
563  *
564  * Note that these values reflect the raw HW capability, without any insight if
565  * any required kernel driver support is present. Bits may be set indicating the
566  * HW has functionality that is lacking kernel software support, such as BTM. If
567  * a VMM is using this information to construct emulated copies of these
568  * registers it should only forward bits that it knows it can support.
569  *
570  * In future, presence of required kernel support will be indicated in flags.
571  */
572 struct iommu_hw_info_arm_smmuv3 {
573 	__u32 flags;
574 	__u32 __reserved;
575 	__u32 idr[6];
576 	__u32 iidr;
577 	__u32 aidr;
578 };
579 
580 /**
581  * enum iommu_hw_info_type - IOMMU Hardware Info Types
582  * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware
583  *                           info
584  * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type
585  * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type
586  */
587 enum iommu_hw_info_type {
588 	IOMMU_HW_INFO_TYPE_NONE = 0,
589 	IOMMU_HW_INFO_TYPE_INTEL_VTD = 1,
590 	IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2,
591 };
592 
593 /**
594  * enum iommufd_hw_capabilities
595  * @IOMMU_HW_CAP_DIRTY_TRACKING: IOMMU hardware support for dirty tracking
596  *                               If available, it means the following APIs
597  *                               are supported:
598  *
599  *                                   IOMMU_HWPT_GET_DIRTY_BITMAP
600  *                                   IOMMU_HWPT_SET_DIRTY_TRACKING
601  *
602  */
603 enum iommufd_hw_capabilities {
604 	IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0,
605 };
606 
607 /**
608  * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO)
609  * @size: sizeof(struct iommu_hw_info)
610  * @flags: Must be 0
611  * @dev_id: The device bound to the iommufd
612  * @data_len: Input the length of a user buffer in bytes. Output the length of
613  *            data that kernel supports
614  * @data_uptr: User pointer to a user-space buffer used by the kernel to fill
615  *             the iommu type specific hardware information data
616  * @out_data_type: Output the iommu hardware info type as defined in the enum
617  *                 iommu_hw_info_type.
618  * @out_capabilities: Output the generic iommu capability info type as defined
619  *                    in the enum iommu_hw_capabilities.
620  * @__reserved: Must be 0
621  *
622  * Query an iommu type specific hardware information data from an iommu behind
623  * a given device that has been bound to iommufd. This hardware info data will
624  * be used to sync capabilities between the virtual iommu and the physical
625  * iommu, e.g. a nested translation setup needs to check the hardware info, so
626  * a guest stage-1 page table can be compatible with the physical iommu.
627  *
628  * To capture an iommu type specific hardware information data, @data_uptr and
629  * its length @data_len must be provided. Trailing bytes will be zeroed if the
630  * user buffer is larger than the data that kernel has. Otherwise, kernel only
631  * fills the buffer using the given length in @data_len. If the ioctl succeeds,
632  * @data_len will be updated to the length that kernel actually supports,
633  * @out_data_type will be filled to decode the data filled in the buffer
634  * pointed by @data_uptr. Input @data_len == zero is allowed.
635  */
636 struct iommu_hw_info {
637 	__u32 size;
638 	__u32 flags;
639 	__u32 dev_id;
640 	__u32 data_len;
641 	__aligned_u64 data_uptr;
642 	__u32 out_data_type;
643 	__u32 __reserved;
644 	__aligned_u64 out_capabilities;
645 };
646 #define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO)
647 
648 /*
649  * enum iommufd_hwpt_set_dirty_tracking_flags - Flags for steering dirty
650  *                                              tracking
651  * @IOMMU_HWPT_DIRTY_TRACKING_ENABLE: Enable dirty tracking
652  */
653 enum iommufd_hwpt_set_dirty_tracking_flags {
654 	IOMMU_HWPT_DIRTY_TRACKING_ENABLE = 1,
655 };
656 
657 /**
658  * struct iommu_hwpt_set_dirty_tracking - ioctl(IOMMU_HWPT_SET_DIRTY_TRACKING)
659  * @size: sizeof(struct iommu_hwpt_set_dirty_tracking)
660  * @flags: Combination of enum iommufd_hwpt_set_dirty_tracking_flags
661  * @hwpt_id: HW pagetable ID that represents the IOMMU domain
662  * @__reserved: Must be 0
663  *
664  * Toggle dirty tracking on an HW pagetable.
665  */
666 struct iommu_hwpt_set_dirty_tracking {
667 	__u32 size;
668 	__u32 flags;
669 	__u32 hwpt_id;
670 	__u32 __reserved;
671 };
672 #define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \
673 					  IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING)
674 
675 /**
676  * enum iommufd_hwpt_get_dirty_bitmap_flags - Flags for getting dirty bits
677  * @IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR: Just read the PTEs without clearing
678  *                                        any dirty bits metadata. This flag
679  *                                        can be passed in the expectation
680  *                                        where the next operation is an unmap
681  *                                        of the same IOVA range.
682  *
683  */
684 enum iommufd_hwpt_get_dirty_bitmap_flags {
685 	IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR = 1,
686 };
687 
688 /**
689  * struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP)
690  * @size: sizeof(struct iommu_hwpt_get_dirty_bitmap)
691  * @hwpt_id: HW pagetable ID that represents the IOMMU domain
692  * @flags: Combination of enum iommufd_hwpt_get_dirty_bitmap_flags
693  * @__reserved: Must be 0
694  * @iova: base IOVA of the bitmap first bit
695  * @length: IOVA range size
696  * @page_size: page size granularity of each bit in the bitmap
697  * @data: bitmap where to set the dirty bits. The bitmap bits each
698  *        represent a page_size which you deviate from an arbitrary iova.
699  *
700  * Checking a given IOVA is dirty:
701  *
702  *  data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64))
703  *
704  * Walk the IOMMU pagetables for a given IOVA range to return a bitmap
705  * with the dirty IOVAs. In doing so it will also by default clear any
706  * dirty bit metadata set in the IOPTE.
707  */
708 struct iommu_hwpt_get_dirty_bitmap {
709 	__u32 size;
710 	__u32 hwpt_id;
711 	__u32 flags;
712 	__u32 __reserved;
713 	__aligned_u64 iova;
714 	__aligned_u64 length;
715 	__aligned_u64 page_size;
716 	__aligned_u64 data;
717 };
718 #define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \
719 					IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP)
720 
721 /**
722  * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation
723  *                                        Data Type
724  * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1
725  * @IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3: Invalidation data for ARM SMMUv3
726  */
727 enum iommu_hwpt_invalidate_data_type {
728 	IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0,
729 	IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3 = 1,
730 };
731 
732 /**
733  * enum iommu_hwpt_vtd_s1_invalidate_flags - Flags for Intel VT-d
734  *                                           stage-1 cache invalidation
735  * @IOMMU_VTD_INV_FLAGS_LEAF: Indicates whether the invalidation applies
736  *                            to all-levels page structure cache or just
737  *                            the leaf PTE cache.
738  */
739 enum iommu_hwpt_vtd_s1_invalidate_flags {
740 	IOMMU_VTD_INV_FLAGS_LEAF = 1 << 0,
741 };
742 
743 /**
744  * struct iommu_hwpt_vtd_s1_invalidate - Intel VT-d cache invalidation
745  *                                       (IOMMU_HWPT_INVALIDATE_DATA_VTD_S1)
746  * @addr: The start address of the range to be invalidated. It needs to
747  *        be 4KB aligned.
748  * @npages: Number of contiguous 4K pages to be invalidated.
749  * @flags: Combination of enum iommu_hwpt_vtd_s1_invalidate_flags
750  * @__reserved: Must be 0
751  *
752  * The Intel VT-d specific invalidation data for user-managed stage-1 cache
753  * invalidation in nested translation. Userspace uses this structure to
754  * tell the impacted cache scope after modifying the stage-1 page table.
755  *
756  * Invalidating all the caches related to the page table by setting @addr
757  * to be 0 and @npages to be U64_MAX.
758  *
759  * The device TLB will be invalidated automatically if ATS is enabled.
760  */
761 struct iommu_hwpt_vtd_s1_invalidate {
762 	__aligned_u64 addr;
763 	__aligned_u64 npages;
764 	__u32 flags;
765 	__u32 __reserved;
766 };
767 
768 /**
769  * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cahce invalidation
770  *         (IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3)
771  * @cmd: 128-bit cache invalidation command that runs in SMMU CMDQ.
772  *       Must be little-endian.
773  *
774  * Supported command list only when passing in a vIOMMU via @hwpt_id:
775  *     CMDQ_OP_TLBI_NSNH_ALL
776  *     CMDQ_OP_TLBI_NH_VA
777  *     CMDQ_OP_TLBI_NH_VAA
778  *     CMDQ_OP_TLBI_NH_ALL
779  *     CMDQ_OP_TLBI_NH_ASID
780  *     CMDQ_OP_ATC_INV
781  *     CMDQ_OP_CFGI_CD
782  *     CMDQ_OP_CFGI_CD_ALL
783  *
784  * -EIO will be returned if the command is not supported.
785  */
786 struct iommu_viommu_arm_smmuv3_invalidate {
787 	__aligned_le64 cmd[2];
788 };
789 
790 /**
791  * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
792  * @size: sizeof(struct iommu_hwpt_invalidate)
793  * @hwpt_id: ID of a nested HWPT or a vIOMMU, for cache invalidation
794  * @data_uptr: User pointer to an array of driver-specific cache invalidation
795  *             data.
796  * @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data
797  *             type of all the entries in the invalidation request array. It
798  *             should be a type supported by the hwpt pointed by @hwpt_id.
799  * @entry_len: Length (in bytes) of a request entry in the request array
800  * @entry_num: Input the number of cache invalidation requests in the array.
801  *             Output the number of requests successfully handled by kernel.
802  * @__reserved: Must be 0.
803  *
804  * Invalidate iommu cache for user-managed page table or vIOMMU. Modifications
805  * on a user-managed page table should be followed by this operation, if a HWPT
806  * is passed in via @hwpt_id. Other caches, such as device cache or descriptor
807  * cache can be flushed if a vIOMMU is passed in via the @hwpt_id field.
808  *
809  * Each ioctl can support one or more cache invalidation requests in the array
810  * that has a total size of @entry_len * @entry_num.
811  *
812  * An empty invalidation request array by setting @entry_num==0 is allowed, and
813  * @entry_len and @data_uptr would be ignored in this case. This can be used to
814  * check if the given @data_type is supported or not by kernel.
815  */
816 struct iommu_hwpt_invalidate {
817 	__u32 size;
818 	__u32 hwpt_id;
819 	__aligned_u64 data_uptr;
820 	__u32 data_type;
821 	__u32 entry_len;
822 	__u32 entry_num;
823 	__u32 __reserved;
824 };
825 #define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE)
826 
827 /**
828  * enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault
829  * @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is
830  *                                   valid.
831  * @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group.
832  */
833 enum iommu_hwpt_pgfault_flags {
834 	IOMMU_PGFAULT_FLAGS_PASID_VALID		= (1 << 0),
835 	IOMMU_PGFAULT_FLAGS_LAST_PAGE		= (1 << 1),
836 };
837 
838 /**
839  * enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault
840  * @IOMMU_PGFAULT_PERM_READ: request for read permission
841  * @IOMMU_PGFAULT_PERM_WRITE: request for write permission
842  * @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the
843  *                           Execute Requested bit set in PASID TLP Prefix.
844  * @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the
845  *                           Privileged Mode Requested bit set in PASID TLP
846  *                           Prefix.
847  */
848 enum iommu_hwpt_pgfault_perm {
849 	IOMMU_PGFAULT_PERM_READ			= (1 << 0),
850 	IOMMU_PGFAULT_PERM_WRITE		= (1 << 1),
851 	IOMMU_PGFAULT_PERM_EXEC			= (1 << 2),
852 	IOMMU_PGFAULT_PERM_PRIV			= (1 << 3),
853 };
854 
855 /**
856  * struct iommu_hwpt_pgfault - iommu page fault data
857  * @flags: Combination of enum iommu_hwpt_pgfault_flags
858  * @dev_id: id of the originated device
859  * @pasid: Process Address Space ID
860  * @grpid: Page Request Group Index
861  * @perm: Combination of enum iommu_hwpt_pgfault_perm
862  * @addr: Fault address
863  * @length: a hint of how much data the requestor is expecting to fetch. For
864  *          example, if the PRI initiator knows it is going to do a 10MB
865  *          transfer, it could fill in 10MB and the OS could pre-fault in
866  *          10MB of IOVA. It's default to 0 if there's no such hint.
867  * @cookie: kernel-managed cookie identifying a group of fault messages. The
868  *          cookie number encoded in the last page fault of the group should
869  *          be echoed back in the response message.
870  */
871 struct iommu_hwpt_pgfault {
872 	__u32 flags;
873 	__u32 dev_id;
874 	__u32 pasid;
875 	__u32 grpid;
876 	__u32 perm;
877 	__u64 addr;
878 	__u32 length;
879 	__u32 cookie;
880 };
881 
882 /**
883  * enum iommufd_page_response_code - Return status of fault handlers
884  * @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables
885  *                             populated, retry the access. This is the
886  *                             "Success" defined in PCI 10.4.2.1.
887  * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
888  *                             access. This is the "Invalid Request" in PCI
889  *                             10.4.2.1.
890  */
891 enum iommufd_page_response_code {
892 	IOMMUFD_PAGE_RESP_SUCCESS = 0,
893 	IOMMUFD_PAGE_RESP_INVALID = 1,
894 };
895 
896 /**
897  * struct iommu_hwpt_page_response - IOMMU page fault response
898  * @cookie: The kernel-managed cookie reported in the fault message.
899  * @code: One of response code in enum iommufd_page_response_code.
900  */
901 struct iommu_hwpt_page_response {
902 	__u32 cookie;
903 	__u32 code;
904 };
905 
906 /**
907  * struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC)
908  * @size: sizeof(struct iommu_fault_alloc)
909  * @flags: Must be 0
910  * @out_fault_id: The ID of the new FAULT
911  * @out_fault_fd: The fd of the new FAULT
912  *
913  * Explicitly allocate a fault handling object.
914  */
915 struct iommu_fault_alloc {
916 	__u32 size;
917 	__u32 flags;
918 	__u32 out_fault_id;
919 	__u32 out_fault_fd;
920 };
921 #define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC)
922 
923 /**
924  * enum iommu_viommu_type - Virtual IOMMU Type
925  * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use
926  * @IOMMU_VIOMMU_TYPE_ARM_SMMUV3: ARM SMMUv3 driver specific type
927  */
928 enum iommu_viommu_type {
929 	IOMMU_VIOMMU_TYPE_DEFAULT = 0,
930 	IOMMU_VIOMMU_TYPE_ARM_SMMUV3 = 1,
931 };
932 
933 /**
934  * struct iommu_viommu_alloc - ioctl(IOMMU_VIOMMU_ALLOC)
935  * @size: sizeof(struct iommu_viommu_alloc)
936  * @flags: Must be 0
937  * @type: Type of the virtual IOMMU. Must be defined in enum iommu_viommu_type
938  * @dev_id: The device's physical IOMMU will be used to back the virtual IOMMU
939  * @hwpt_id: ID of a nesting parent HWPT to associate to
940  * @out_viommu_id: Output virtual IOMMU ID for the allocated object
941  *
942  * Allocate a virtual IOMMU object, representing the underlying physical IOMMU's
943  * virtualization support that is a security-isolated slice of the real IOMMU HW
944  * that is unique to a specific VM. Operations global to the IOMMU are connected
945  * to the vIOMMU, such as:
946  * - Security namespace for guest owned ID, e.g. guest-controlled cache tags
947  * - Non-device-affiliated event reporting, e.g. invalidation queue errors
948  * - Access to a sharable nesting parent pagetable across physical IOMMUs
949  * - Virtualization of various platforms IDs, e.g. RIDs and others
950  * - Delivery of paravirtualized invalidation
951  * - Direct assigned invalidation queues
952  * - Direct assigned interrupts
953  */
954 struct iommu_viommu_alloc {
955 	__u32 size;
956 	__u32 flags;
957 	__u32 type;
958 	__u32 dev_id;
959 	__u32 hwpt_id;
960 	__u32 out_viommu_id;
961 };
962 #define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC)
963 
964 /**
965  * struct iommu_vdevice_alloc - ioctl(IOMMU_VDEVICE_ALLOC)
966  * @size: sizeof(struct iommu_vdevice_alloc)
967  * @viommu_id: vIOMMU ID to associate with the virtual device
968  * @dev_id: The physical device to allocate a virtual instance on the vIOMMU
969  * @out_vdevice_id: Object handle for the vDevice. Pass to IOMMU_DESTORY
970  * @virt_id: Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID
971  *           of AMD IOMMU, and vRID of a nested Intel VT-d to a Context Table
972  *
973  * Allocate a virtual device instance (for a physical device) against a vIOMMU.
974  * This instance holds the device's information (related to its vIOMMU) in a VM.
975  */
976 struct iommu_vdevice_alloc {
977 	__u32 size;
978 	__u32 viommu_id;
979 	__u32 dev_id;
980 	__u32 out_vdevice_id;
981 	__aligned_u64 virt_id;
982 };
983 #define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC)
984 
985 /**
986  * struct iommu_ioas_change_process - ioctl(VFIO_IOAS_CHANGE_PROCESS)
987  * @size: sizeof(struct iommu_ioas_change_process)
988  * @__reserved: Must be 0
989  *
990  * This transfers pinned memory counts for every memory map in every IOAS
991  * in the context to the current process.  This only supports maps created
992  * with IOMMU_IOAS_MAP_FILE, and returns EINVAL if other maps are present.
993  * If the ioctl returns a failure status, then nothing is changed.
994  *
995  * This API is useful for transferring operation of a device from one process
996  * to another, such as during userland live update.
997  */
998 struct iommu_ioas_change_process {
999 	__u32 size;
1000 	__u32 __reserved;
1001 };
1002 
1003 #define IOMMU_IOAS_CHANGE_PROCESS \
1004 	_IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS)
1005 
1006 #endif
1007