xref: /linux/include/uapi/linux/iommufd.h (revision f1d26d72f01556c787b1291729aa7a2ce37656a8)
1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
3  */
4 #ifndef _UAPI_IOMMUFD_H
5 #define _UAPI_IOMMUFD_H
6 
7 #include <linux/ioctl.h>
8 #include <linux/types.h>
9 
10 #define IOMMUFD_TYPE (';')
11 
12 /**
13  * DOC: General ioctl format
14  *
15  * The ioctl interface follows a general format to allow for extensibility. Each
16  * ioctl is passed in a structure pointer as the argument providing the size of
17  * the structure in the first u32. The kernel checks that any structure space
18  * beyond what it understands is 0. This allows userspace to use the backward
19  * compatible portion while consistently using the newer, larger, structures.
20  *
21  * ioctls use a standard meaning for common errnos:
22  *
23  *  - ENOTTY: The IOCTL number itself is not supported at all
24  *  - E2BIG: The IOCTL number is supported, but the provided structure has
25  *    non-zero in a part the kernel does not understand.
26  *  - EOPNOTSUPP: The IOCTL number is supported, and the structure is
27  *    understood, however a known field has a value the kernel does not
28  *    understand or support.
29  *  - EINVAL: Everything about the IOCTL was understood, but a field is not
30  *    correct.
31  *  - ENOENT: An ID or IOVA provided does not exist.
32  *  - ENOMEM: Out of memory.
33  *  - EOVERFLOW: Mathematics overflowed.
34  *
35  * As well as additional errnos, within specific ioctls.
36  */
37 enum {
38 	IOMMUFD_CMD_BASE = 0x80,
39 	IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE,
40 	IOMMUFD_CMD_IOAS_ALLOC = 0x81,
41 	IOMMUFD_CMD_IOAS_ALLOW_IOVAS = 0x82,
42 	IOMMUFD_CMD_IOAS_COPY = 0x83,
43 	IOMMUFD_CMD_IOAS_IOVA_RANGES = 0x84,
44 	IOMMUFD_CMD_IOAS_MAP = 0x85,
45 	IOMMUFD_CMD_IOAS_UNMAP = 0x86,
46 	IOMMUFD_CMD_OPTION = 0x87,
47 	IOMMUFD_CMD_VFIO_IOAS = 0x88,
48 	IOMMUFD_CMD_HWPT_ALLOC = 0x89,
49 	IOMMUFD_CMD_GET_HW_INFO = 0x8a,
50 	IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING = 0x8b,
51 	IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c,
52 	IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
53 	IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
54 	IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f,
55 	IOMMUFD_CMD_VIOMMU_ALLOC = 0x90,
56 	IOMMUFD_CMD_VDEVICE_ALLOC = 0x91,
57 	IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92,
58 	IOMMUFD_CMD_VEVENTQ_ALLOC = 0x93,
59 	IOMMUFD_CMD_HW_QUEUE_ALLOC = 0x94,
60 };
61 
62 /**
63  * struct iommu_destroy - ioctl(IOMMU_DESTROY)
64  * @size: sizeof(struct iommu_destroy)
65  * @id: iommufd object ID to destroy. Can be any destroyable object type.
66  *
67  * Destroy any object held within iommufd.
68  */
69 struct iommu_destroy {
70 	__u32 size;
71 	__u32 id;
72 };
73 #define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY)
74 
75 /**
76  * struct iommu_ioas_alloc - ioctl(IOMMU_IOAS_ALLOC)
77  * @size: sizeof(struct iommu_ioas_alloc)
78  * @flags: Must be 0
79  * @out_ioas_id: Output IOAS ID for the allocated object
80  *
81  * Allocate an IO Address Space (IOAS) which holds an IO Virtual Address (IOVA)
82  * to memory mapping.
83  */
84 struct iommu_ioas_alloc {
85 	__u32 size;
86 	__u32 flags;
87 	__u32 out_ioas_id;
88 };
89 #define IOMMU_IOAS_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOC)
90 
91 /**
92  * struct iommu_iova_range - ioctl(IOMMU_IOVA_RANGE)
93  * @start: First IOVA
94  * @last: Inclusive last IOVA
95  *
96  * An interval in IOVA space.
97  */
98 struct iommu_iova_range {
99 	__aligned_u64 start;
100 	__aligned_u64 last;
101 };
102 
103 /**
104  * struct iommu_ioas_iova_ranges - ioctl(IOMMU_IOAS_IOVA_RANGES)
105  * @size: sizeof(struct iommu_ioas_iova_ranges)
106  * @ioas_id: IOAS ID to read ranges from
107  * @num_iovas: Input/Output total number of ranges in the IOAS
108  * @__reserved: Must be 0
109  * @allowed_iovas: Pointer to the output array of struct iommu_iova_range
110  * @out_iova_alignment: Minimum alignment required for mapping IOVA
111  *
112  * Query an IOAS for ranges of allowed IOVAs. Mapping IOVA outside these ranges
113  * is not allowed. num_iovas will be set to the total number of iovas and
114  * the allowed_iovas[] will be filled in as space permits.
115  *
116  * The allowed ranges are dependent on the HW path the DMA operation takes, and
117  * can change during the lifetime of the IOAS. A fresh empty IOAS will have a
118  * full range, and each attached device will narrow the ranges based on that
119  * device's HW restrictions. Detaching a device can widen the ranges. Userspace
120  * should query ranges after every attach/detach to know what IOVAs are valid
121  * for mapping.
122  *
123  * On input num_iovas is the length of the allowed_iovas array. On output it is
124  * the total number of iovas filled in. The ioctl will return -EMSGSIZE and set
125  * num_iovas to the required value if num_iovas is too small. In this case the
126  * caller should allocate a larger output array and re-issue the ioctl.
127  *
128  * out_iova_alignment returns the minimum IOVA alignment that can be given
129  * to IOMMU_IOAS_MAP/COPY. IOVA's must satisfy::
130  *
131  *   starting_iova % out_iova_alignment == 0
132  *   (starting_iova + length) % out_iova_alignment == 0
133  *
134  * out_iova_alignment can be 1 indicating any IOVA is allowed. It cannot
135  * be higher than the system PAGE_SIZE.
136  */
137 struct iommu_ioas_iova_ranges {
138 	__u32 size;
139 	__u32 ioas_id;
140 	__u32 num_iovas;
141 	__u32 __reserved;
142 	__aligned_u64 allowed_iovas;
143 	__aligned_u64 out_iova_alignment;
144 };
145 #define IOMMU_IOAS_IOVA_RANGES _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_IOVA_RANGES)
146 
147 /**
148  * struct iommu_ioas_allow_iovas - ioctl(IOMMU_IOAS_ALLOW_IOVAS)
149  * @size: sizeof(struct iommu_ioas_allow_iovas)
150  * @ioas_id: IOAS ID to allow IOVAs from
151  * @num_iovas: Input/Output total number of ranges in the IOAS
152  * @__reserved: Must be 0
153  * @allowed_iovas: Pointer to array of struct iommu_iova_range
154  *
155  * Ensure a range of IOVAs are always available for allocation. If this call
156  * succeeds then IOMMU_IOAS_IOVA_RANGES will never return a list of IOVA ranges
157  * that are narrower than the ranges provided here. This call will fail if
158  * IOMMU_IOAS_IOVA_RANGES is currently narrower than the given ranges.
159  *
160  * When an IOAS is first created the IOVA_RANGES will be maximally sized, and as
161  * devices are attached the IOVA will narrow based on the device restrictions.
162  * When an allowed range is specified any narrowing will be refused, ie device
163  * attachment can fail if the device requires limiting within the allowed range.
164  *
165  * Automatic IOVA allocation is also impacted by this call. MAP will only
166  * allocate within the allowed IOVAs if they are present.
167  *
168  * This call replaces the entire allowed list with the given list.
169  */
170 struct iommu_ioas_allow_iovas {
171 	__u32 size;
172 	__u32 ioas_id;
173 	__u32 num_iovas;
174 	__u32 __reserved;
175 	__aligned_u64 allowed_iovas;
176 };
177 #define IOMMU_IOAS_ALLOW_IOVAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOW_IOVAS)
178 
179 /**
180  * enum iommufd_ioas_map_flags - Flags for map and copy
181  * @IOMMU_IOAS_MAP_FIXED_IOVA: If clear the kernel will compute an appropriate
182  *                             IOVA to place the mapping at
183  * @IOMMU_IOAS_MAP_WRITEABLE: DMA is allowed to write to this mapping
184  * @IOMMU_IOAS_MAP_READABLE: DMA is allowed to read from this mapping
185  */
186 enum iommufd_ioas_map_flags {
187 	IOMMU_IOAS_MAP_FIXED_IOVA = 1 << 0,
188 	IOMMU_IOAS_MAP_WRITEABLE = 1 << 1,
189 	IOMMU_IOAS_MAP_READABLE = 1 << 2,
190 };
191 
192 /**
193  * struct iommu_ioas_map - ioctl(IOMMU_IOAS_MAP)
194  * @size: sizeof(struct iommu_ioas_map)
195  * @flags: Combination of enum iommufd_ioas_map_flags
196  * @ioas_id: IOAS ID to change the mapping of
197  * @__reserved: Must be 0
198  * @user_va: Userspace pointer to start mapping from
199  * @length: Number of bytes to map
200  * @iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is set
201  *        then this must be provided as input.
202  *
203  * Set an IOVA mapping from a user pointer. If FIXED_IOVA is specified then the
204  * mapping will be established at iova, otherwise a suitable location based on
205  * the reserved and allowed lists will be automatically selected and returned in
206  * iova.
207  *
208  * If IOMMU_IOAS_MAP_FIXED_IOVA is specified then the iova range must currently
209  * be unused, existing IOVA cannot be replaced.
210  */
211 struct iommu_ioas_map {
212 	__u32 size;
213 	__u32 flags;
214 	__u32 ioas_id;
215 	__u32 __reserved;
216 	__aligned_u64 user_va;
217 	__aligned_u64 length;
218 	__aligned_u64 iova;
219 };
220 #define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP)
221 
222 /**
223  * struct iommu_ioas_map_file - ioctl(IOMMU_IOAS_MAP_FILE)
224  * @size: sizeof(struct iommu_ioas_map_file)
225  * @flags: same as for iommu_ioas_map
226  * @ioas_id: same as for iommu_ioas_map
227  * @fd: the memfd to map
228  * @start: byte offset from start of file to map from
229  * @length: same as for iommu_ioas_map
230  * @iova: same as for iommu_ioas_map
231  *
232  * Set an IOVA mapping from a memfd file.  All other arguments and semantics
233  * match those of IOMMU_IOAS_MAP.
234  */
235 struct iommu_ioas_map_file {
236 	__u32 size;
237 	__u32 flags;
238 	__u32 ioas_id;
239 	__s32 fd;
240 	__aligned_u64 start;
241 	__aligned_u64 length;
242 	__aligned_u64 iova;
243 };
244 #define IOMMU_IOAS_MAP_FILE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP_FILE)
245 
246 /**
247  * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY)
248  * @size: sizeof(struct iommu_ioas_copy)
249  * @flags: Combination of enum iommufd_ioas_map_flags
250  * @dst_ioas_id: IOAS ID to change the mapping of
251  * @src_ioas_id: IOAS ID to copy from
252  * @length: Number of bytes to copy and map
253  * @dst_iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is
254  *            set then this must be provided as input.
255  * @src_iova: IOVA to start the copy
256  *
257  * Copy an already existing mapping from src_ioas_id and establish it in
258  * dst_ioas_id. The src iova/length must exactly match a range used with
259  * IOMMU_IOAS_MAP.
260  *
261  * This may be used to efficiently clone a subset of an IOAS to another, or as a
262  * kind of 'cache' to speed up mapping. Copy has an efficiency advantage over
263  * establishing equivalent new mappings, as internal resources are shared, and
264  * the kernel will pin the user memory only once.
265  */
266 struct iommu_ioas_copy {
267 	__u32 size;
268 	__u32 flags;
269 	__u32 dst_ioas_id;
270 	__u32 src_ioas_id;
271 	__aligned_u64 length;
272 	__aligned_u64 dst_iova;
273 	__aligned_u64 src_iova;
274 };
275 #define IOMMU_IOAS_COPY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_COPY)
276 
277 /**
278  * struct iommu_ioas_unmap - ioctl(IOMMU_IOAS_UNMAP)
279  * @size: sizeof(struct iommu_ioas_unmap)
280  * @ioas_id: IOAS ID to change the mapping of
281  * @iova: IOVA to start the unmapping at
282  * @length: Number of bytes to unmap, and return back the bytes unmapped
283  *
284  * Unmap an IOVA range. The iova/length must be a superset of a previously
285  * mapped range used with IOMMU_IOAS_MAP or IOMMU_IOAS_COPY. Splitting or
286  * truncating ranges is not allowed. The values 0 to U64_MAX will unmap
287  * everything.
288  */
289 struct iommu_ioas_unmap {
290 	__u32 size;
291 	__u32 ioas_id;
292 	__aligned_u64 iova;
293 	__aligned_u64 length;
294 };
295 #define IOMMU_IOAS_UNMAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_UNMAP)
296 
297 /**
298  * enum iommufd_option - ioctl(IOMMU_OPTION_RLIMIT_MODE) and
299  *                       ioctl(IOMMU_OPTION_HUGE_PAGES)
300  * @IOMMU_OPTION_RLIMIT_MODE:
301  *    Change how RLIMIT_MEMLOCK accounting works. The caller must have privilege
302  *    to invoke this. Value 0 (default) is user based accounting, 1 uses process
303  *    based accounting. Global option, object_id must be 0
304  * @IOMMU_OPTION_HUGE_PAGES:
305  *    Value 1 (default) allows contiguous pages to be combined when generating
306  *    iommu mappings. Value 0 disables combining, everything is mapped to
307  *    PAGE_SIZE. This can be useful for benchmarking.  This is a per-IOAS
308  *    option, the object_id must be the IOAS ID.
309  */
310 enum iommufd_option {
311 	IOMMU_OPTION_RLIMIT_MODE = 0,
312 	IOMMU_OPTION_HUGE_PAGES = 1,
313 };
314 
315 /**
316  * enum iommufd_option_ops - ioctl(IOMMU_OPTION_OP_SET) and
317  *                           ioctl(IOMMU_OPTION_OP_GET)
318  * @IOMMU_OPTION_OP_SET: Set the option's value
319  * @IOMMU_OPTION_OP_GET: Get the option's value
320  */
321 enum iommufd_option_ops {
322 	IOMMU_OPTION_OP_SET = 0,
323 	IOMMU_OPTION_OP_GET = 1,
324 };
325 
326 /**
327  * struct iommu_option - iommu option multiplexer
328  * @size: sizeof(struct iommu_option)
329  * @option_id: One of enum iommufd_option
330  * @op: One of enum iommufd_option_ops
331  * @__reserved: Must be 0
332  * @object_id: ID of the object if required
333  * @val64: Option value to set or value returned on get
334  *
335  * Change a simple option value. This multiplexor allows controlling options
336  * on objects. IOMMU_OPTION_OP_SET will load an option and IOMMU_OPTION_OP_GET
337  * will return the current value.
338  */
339 struct iommu_option {
340 	__u32 size;
341 	__u32 option_id;
342 	__u16 op;
343 	__u16 __reserved;
344 	__u32 object_id;
345 	__aligned_u64 val64;
346 };
347 #define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION)
348 
349 /**
350  * enum iommufd_vfio_ioas_op - IOMMU_VFIO_IOAS_* ioctls
351  * @IOMMU_VFIO_IOAS_GET: Get the current compatibility IOAS
352  * @IOMMU_VFIO_IOAS_SET: Change the current compatibility IOAS
353  * @IOMMU_VFIO_IOAS_CLEAR: Disable VFIO compatibility
354  */
355 enum iommufd_vfio_ioas_op {
356 	IOMMU_VFIO_IOAS_GET = 0,
357 	IOMMU_VFIO_IOAS_SET = 1,
358 	IOMMU_VFIO_IOAS_CLEAR = 2,
359 };
360 
361 /**
362  * struct iommu_vfio_ioas - ioctl(IOMMU_VFIO_IOAS)
363  * @size: sizeof(struct iommu_vfio_ioas)
364  * @ioas_id: For IOMMU_VFIO_IOAS_SET the input IOAS ID to set
365  *           For IOMMU_VFIO_IOAS_GET will output the IOAS ID
366  * @op: One of enum iommufd_vfio_ioas_op
367  * @__reserved: Must be 0
368  *
369  * The VFIO compatibility support uses a single ioas because VFIO APIs do not
370  * support the ID field. Set or Get the IOAS that VFIO compatibility will use.
371  * When VFIO_GROUP_SET_CONTAINER is used on an iommufd it will get the
372  * compatibility ioas, either by taking what is already set, or auto creating
373  * one. From then on VFIO will continue to use that ioas and is not effected by
374  * this ioctl. SET or CLEAR does not destroy any auto-created IOAS.
375  */
376 struct iommu_vfio_ioas {
377 	__u32 size;
378 	__u32 ioas_id;
379 	__u16 op;
380 	__u16 __reserved;
381 };
382 #define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS)
383 
384 /**
385  * enum iommufd_hwpt_alloc_flags - Flags for HWPT allocation
386  * @IOMMU_HWPT_ALLOC_NEST_PARENT: If set, allocate a HWPT that can serve as
387  *                                the parent HWPT in a nesting configuration.
388  * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is
389  *                                   enforced on device attachment
390  * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is
391  *                             valid.
392  * @IOMMU_HWPT_ALLOC_PASID: Requests a domain that can be used with PASID. The
393  *                          domain can be attached to any PASID on the device.
394  *                          Any domain attached to the non-PASID part of the
395  *                          device must also be flagged, otherwise attaching a
396  *                          PASID will blocked.
397  *                          For the user that wants to attach PASID, ioas is
398  *                          not recommended for both the non-PASID part
399  *                          and PASID part of the device.
400  *                          If IOMMU does not support PASID it will return
401  *                          error (-EOPNOTSUPP).
402  */
403 enum iommufd_hwpt_alloc_flags {
404 	IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
405 	IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
406 	IOMMU_HWPT_FAULT_ID_VALID = 1 << 2,
407 	IOMMU_HWPT_ALLOC_PASID = 1 << 3,
408 };
409 
410 /**
411  * enum iommu_hwpt_vtd_s1_flags - Intel VT-d stage-1 page table
412  *                                entry attributes
413  * @IOMMU_VTD_S1_SRE: Supervisor request
414  * @IOMMU_VTD_S1_EAFE: Extended access enable
415  * @IOMMU_VTD_S1_WPE: Write protect enable
416  */
417 enum iommu_hwpt_vtd_s1_flags {
418 	IOMMU_VTD_S1_SRE = 1 << 0,
419 	IOMMU_VTD_S1_EAFE = 1 << 1,
420 	IOMMU_VTD_S1_WPE = 1 << 2,
421 };
422 
423 /**
424  * struct iommu_hwpt_vtd_s1 - Intel VT-d stage-1 page table
425  *                            info (IOMMU_HWPT_DATA_VTD_S1)
426  * @flags: Combination of enum iommu_hwpt_vtd_s1_flags
427  * @pgtbl_addr: The base address of the stage-1 page table.
428  * @addr_width: The address width of the stage-1 page table
429  * @__reserved: Must be 0
430  */
431 struct iommu_hwpt_vtd_s1 {
432 	__aligned_u64 flags;
433 	__aligned_u64 pgtbl_addr;
434 	__u32 addr_width;
435 	__u32 __reserved;
436 };
437 
438 /**
439  * struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 nested STE
440  *                                (IOMMU_HWPT_DATA_ARM_SMMUV3)
441  *
442  * @ste: The first two double words of the user space Stream Table Entry for
443  *       the translation. Must be little-endian.
444  *       Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec)
445  *       - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax
446  *       - word-1: EATS, S1DSS, S1CIR, S1COR, S1CSH, S1STALLD
447  *
448  * -EIO will be returned if @ste is not legal or contains any non-allowed field.
449  * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass
450  * nested domain will translate the same as the nesting parent. The S1 will
451  * install a Context Descriptor Table pointing at userspace memory translated
452  * by the nesting parent.
453  *
454  * It's suggested to allocate a vDEVICE object carrying vSID and then re-attach
455  * the nested domain, as soon as the vSID is available in the VMM level:
456  *
457  * - when Cfg=translate, a vDEVICE must be allocated prior to attaching to the
458  *   allocated nested domain, as CD/ATS invalidations and vevents need a vSID.
459  * - when Cfg=bypass/abort, a vDEVICE is not enforced during the nested domain
460  *   attachment, to support a GBPA case where VM sets CR0.SMMUEN=0. However, if
461  *   VM sets CR0.SMMUEN=1 while missing a vDEVICE object, kernel would fail to
462  *   report events to the VM. E.g. F_TRANSLATION when guest STE.Cfg=abort.
463  */
464 struct iommu_hwpt_arm_smmuv3 {
465 	__aligned_le64 ste[2];
466 };
467 
468 /**
469  * struct iommu_hwpt_amd_guest - AMD IOMMU guest I/O page table data
470  *				 (IOMMU_HWPT_DATA_AMD_GUEST)
471  * @dte: Guest Device Table Entry (DTE)
472  */
473 struct iommu_hwpt_amd_guest {
474 	__aligned_u64 dte[4];
475 };
476 
477 /**
478  * enum iommu_hwpt_data_type - IOMMU HWPT Data Type
479  * @IOMMU_HWPT_DATA_NONE: no data
480  * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table
481  * @IOMMU_HWPT_DATA_ARM_SMMUV3: ARM SMMUv3 Context Descriptor Table
482  * @IOMMU_HWPT_DATA_AMD_GUEST: AMD IOMMU guest page table
483  */
484 enum iommu_hwpt_data_type {
485 	IOMMU_HWPT_DATA_NONE = 0,
486 	IOMMU_HWPT_DATA_VTD_S1 = 1,
487 	IOMMU_HWPT_DATA_ARM_SMMUV3 = 2,
488 	IOMMU_HWPT_DATA_AMD_GUEST = 3,
489 };
490 
491 /**
492  * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC)
493  * @size: sizeof(struct iommu_hwpt_alloc)
494  * @flags: Combination of enum iommufd_hwpt_alloc_flags
495  * @dev_id: The device to allocate this HWPT for
496  * @pt_id: The IOAS or HWPT or vIOMMU to connect this HWPT to
497  * @out_hwpt_id: The ID of the new HWPT
498  * @__reserved: Must be 0
499  * @data_type: One of enum iommu_hwpt_data_type
500  * @data_len: Length of the type specific data
501  * @data_uptr: User pointer to the type specific data
502  * @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of
503  *            IOMMU_HWPT_FAULT_ID_VALID is set.
504  * @__reserved2: Padding to 64-bit alignment. Must be 0.
505  *
506  * Explicitly allocate a hardware page table object. This is the same object
507  * type that is returned by iommufd_device_attach() and represents the
508  * underlying iommu driver's iommu_domain kernel object.
509  *
510  * A kernel-managed HWPT will be created with the mappings from the given
511  * IOAS via the @pt_id. The @data_type for this allocation must be set to
512  * IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a
513  * nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags.
514  *
515  * A user-managed nested HWPT will be created from a given vIOMMU (wrapping a
516  * parent HWPT) or a parent HWPT via @pt_id, in which the parent HWPT must be
517  * allocated previously via the same ioctl from a given IOAS (@pt_id). In this
518  * case, the @data_type must be set to a pre-defined type corresponding to an
519  * I/O page table type supported by the underlying IOMMU hardware. The device
520  * via @dev_id and the vIOMMU via @pt_id must be associated to the same IOMMU
521  * instance.
522  *
523  * If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and
524  * @data_uptr should be zero. Otherwise, both @data_len and @data_uptr
525  * must be given.
526  */
527 struct iommu_hwpt_alloc {
528 	__u32 size;
529 	__u32 flags;
530 	__u32 dev_id;
531 	__u32 pt_id;
532 	__u32 out_hwpt_id;
533 	__u32 __reserved;
534 	__u32 data_type;
535 	__u32 data_len;
536 	__aligned_u64 data_uptr;
537 	__u32 fault_id;
538 	__u32 __reserved2;
539 };
540 #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
541 
542 /**
543  * enum iommu_hw_info_vtd_flags - Flags for VT-d hw_info
544  * @IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17: If set, disallow read-only mappings
545  *                                         on a nested_parent domain.
546  *                                         https://www.intel.com/content/www/us/en/content-details/772415/content-details.html
547  */
548 enum iommu_hw_info_vtd_flags {
549 	IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 = 1 << 0,
550 };
551 
552 /**
553  * struct iommu_hw_info_vtd - Intel VT-d hardware information
554  *
555  * @flags: Combination of enum iommu_hw_info_vtd_flags
556  * @__reserved: Must be 0
557  *
558  * @cap_reg: Value of Intel VT-d capability register defined in VT-d spec
559  *           section 11.4.2 Capability Register.
560  * @ecap_reg: Value of Intel VT-d capability register defined in VT-d spec
561  *            section 11.4.3 Extended Capability Register.
562  *
563  * User needs to understand the Intel VT-d specification to decode the
564  * register value.
565  */
566 struct iommu_hw_info_vtd {
567 	__u32 flags;
568 	__u32 __reserved;
569 	__aligned_u64 cap_reg;
570 	__aligned_u64 ecap_reg;
571 };
572 
573 /**
574  * struct iommu_hw_info_arm_smmuv3 - ARM SMMUv3 hardware information
575  *                                   (IOMMU_HW_INFO_TYPE_ARM_SMMUV3)
576  *
577  * @flags: Must be set to 0
578  * @__reserved: Must be 0
579  * @idr: Implemented features for ARM SMMU Non-secure programming interface
580  * @iidr: Information about the implementation and implementer of ARM SMMU,
581  *        and architecture version supported
582  * @aidr: ARM SMMU architecture version
583  *
584  * For the details of @idr, @iidr and @aidr, please refer to the chapters
585  * from 6.3.1 to 6.3.6 in the SMMUv3 Spec.
586  *
587  * This reports the raw HW capability, and not all bits are meaningful to be
588  * read by userspace. Only the following fields should be used:
589  *
590  * idr[0]: ST_LEVEL, TERM_MODEL, STALL_MODEL, TTENDIAN , CD2L, ASID16, TTF
591  * idr[1]: SIDSIZE, SSIDSIZE
592  * idr[3]: BBML, RIL
593  * idr[5]: VAX, GRAN64K, GRAN16K, GRAN4K
594  *
595  * - S1P should be assumed to be true if a NESTED HWPT can be created
596  * - VFIO/iommufd only support platforms with COHACC, it should be assumed to be
597  *   true.
598  * - ATS is a per-device property. If the VMM describes any devices as ATS
599  *   capable in ACPI/DT it should set the corresponding idr.
600  *
601  * This list may expand in future (eg E0PD, AIE, PBHA, D128, DS etc). It is
602  * important that VMMs do not read bits outside the list to allow for
603  * compatibility with future kernels. Several features in the SMMUv3
604  * architecture are not currently supported by the kernel for nesting: HTTU,
605  * BTM, MPAM and others.
606  */
607 struct iommu_hw_info_arm_smmuv3 {
608 	__u32 flags;
609 	__u32 __reserved;
610 	__u32 idr[6];
611 	__u32 iidr;
612 	__u32 aidr;
613 };
614 
615 /**
616  * struct iommu_hw_info_tegra241_cmdqv - NVIDIA Tegra241 CMDQV Hardware
617  *         Information (IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV)
618  *
619  * @flags: Must be 0
620  * @version: Version number for the CMDQ-V HW for PARAM bits[03:00]
621  * @log2vcmdqs: Log2 of the total number of VCMDQs for PARAM bits[07:04]
622  * @log2vsids: Log2 of the total number of SID replacements for PARAM bits[15:12]
623  * @__reserved: Must be 0
624  *
625  * VMM can use these fields directly in its emulated global PARAM register. Note
626  * that only one Virtual Interface (VINTF) should be exposed to a VM, i.e. PARAM
627  * bits[11:08] should be set to 0 for log2 of the total number of VINTFs.
628  */
629 struct iommu_hw_info_tegra241_cmdqv {
630 	__u32 flags;
631 	__u8 version;
632 	__u8 log2vcmdqs;
633 	__u8 log2vsids;
634 	__u8 __reserved;
635 };
636 
637 /**
638  * struct iommu_hw_info_amd - AMD IOMMU device info
639  *
640  * @efr : Value of AMD IOMMU Extended Feature Register (EFR)
641  * @efr2: Value of AMD IOMMU Extended Feature 2 Register (EFR2)
642  *
643  * Please See description of these registers in the following sections of
644  * the AMD I/O Virtualization Technology (IOMMU) Specification.
645  * (https://docs.amd.com/v/u/en-US/48882_3.10_PUB)
646  *
647  * - MMIO Offset 0030h IOMMU Extended Feature Register
648  * - MMIO Offset 01A0h IOMMU Extended Feature 2 Register
649  *
650  * Note: The EFR and EFR2 are raw values reported by hardware.
651  * VMM is responsible to determine the appropriate flags to be exposed to
652  * the VM since cetertain features are not currently supported by the kernel
653  * for HW-vIOMMU.
654  *
655  * Current VMM-allowed list of feature flags are:
656  * - EFR[GTSup, GASup, GioSup, PPRSup, EPHSup, GATS, GLX, PASmax]
657  */
658 struct iommu_hw_info_amd {
659 	__aligned_u64 efr;
660 	__aligned_u64 efr2;
661 };
662 
663 /**
664  * enum iommu_hw_info_type - IOMMU Hardware Info Types
665  * @IOMMU_HW_INFO_TYPE_NONE: Output by the drivers that do not report hardware
666  *                           info
667  * @IOMMU_HW_INFO_TYPE_DEFAULT: Input to request for a default type
668  * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type
669  * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type
670  * @IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM
671  *                                     SMMUv3) info type
672  * @IOMMU_HW_INFO_TYPE_AMD: AMD IOMMU info type
673  */
674 enum iommu_hw_info_type {
675 	IOMMU_HW_INFO_TYPE_NONE = 0,
676 	IOMMU_HW_INFO_TYPE_DEFAULT = 0,
677 	IOMMU_HW_INFO_TYPE_INTEL_VTD = 1,
678 	IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2,
679 	IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV = 3,
680 	IOMMU_HW_INFO_TYPE_AMD = 4,
681 };
682 
683 /**
684  * enum iommufd_hw_capabilities
685  * @IOMMU_HW_CAP_DIRTY_TRACKING: IOMMU hardware support for dirty tracking
686  *                               If available, it means the following APIs
687  *                               are supported:
688  *
689  *                                   IOMMU_HWPT_GET_DIRTY_BITMAP
690  *                                   IOMMU_HWPT_SET_DIRTY_TRACKING
691  *
692  * @IOMMU_HW_CAP_PCI_PASID_EXEC: Execute Permission Supported, user ignores it
693  *                               when the struct
694  *                               iommu_hw_info::out_max_pasid_log2 is zero.
695  * @IOMMU_HW_CAP_PCI_PASID_PRIV: Privileged Mode Supported, user ignores it
696  *                               when the struct
697  *                               iommu_hw_info::out_max_pasid_log2 is zero.
698  * @IOMMU_HW_CAP_PCI_ATS_NOT_SUPPORTED: ATS is not supported or cannot be used
699  *                                      on this device (absence implies ATS
700  *                                      may be enabled)
701  */
702 enum iommufd_hw_capabilities {
703 	IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0,
704 	IOMMU_HW_CAP_PCI_PASID_EXEC = 1 << 1,
705 	IOMMU_HW_CAP_PCI_PASID_PRIV = 1 << 2,
706 	IOMMU_HW_CAP_PCI_ATS_NOT_SUPPORTED = 1 << 3,
707 };
708 
709 /**
710  * enum iommufd_hw_info_flags - Flags for iommu_hw_info
711  * @IOMMU_HW_INFO_FLAG_INPUT_TYPE: If set, @in_data_type carries an input type
712  *                                 for user space to request for a specific info
713  */
714 enum iommufd_hw_info_flags {
715 	IOMMU_HW_INFO_FLAG_INPUT_TYPE = 1 << 0,
716 };
717 
718 /**
719  * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO)
720  * @size: sizeof(struct iommu_hw_info)
721  * @flags: Must be 0
722  * @dev_id: The device bound to the iommufd
723  * @data_len: Input the length of a user buffer in bytes. Output the length of
724  *            data that kernel supports
725  * @data_uptr: User pointer to a user-space buffer used by the kernel to fill
726  *             the iommu type specific hardware information data
727  * @in_data_type: This shares the same field with @out_data_type, making it be
728  *                a bidirectional field. When IOMMU_HW_INFO_FLAG_INPUT_TYPE is
729  *                set, an input type carried via this @in_data_type field will
730  *                be valid, requesting for the info data to the given type. If
731  *                IOMMU_HW_INFO_FLAG_INPUT_TYPE is unset, any input value will
732  *                be seen as IOMMU_HW_INFO_TYPE_DEFAULT
733  * @out_data_type: Output the iommu hardware info type as defined in the enum
734  *                 iommu_hw_info_type.
735  * @out_capabilities: Output the generic iommu capability info type as defined
736  *                    in the enum iommu_hw_capabilities.
737  * @out_max_pasid_log2: Output the width of PASIDs. 0 means no PASID support.
738  *                      PCI devices turn to out_capabilities to check if the
739  *                      specific capabilities is supported or not.
740  * @__reserved: Must be 0
741  *
742  * Query an iommu type specific hardware information data from an iommu behind
743  * a given device that has been bound to iommufd. This hardware info data will
744  * be used to sync capabilities between the virtual iommu and the physical
745  * iommu, e.g. a nested translation setup needs to check the hardware info, so
746  * a guest stage-1 page table can be compatible with the physical iommu.
747  *
748  * To capture an iommu type specific hardware information data, @data_uptr and
749  * its length @data_len must be provided. Trailing bytes will be zeroed if the
750  * user buffer is larger than the data that kernel has. Otherwise, kernel only
751  * fills the buffer using the given length in @data_len. If the ioctl succeeds,
752  * @data_len will be updated to the length that kernel actually supports,
753  * @out_data_type will be filled to decode the data filled in the buffer
754  * pointed by @data_uptr. Input @data_len == zero is allowed.
755  */
756 struct iommu_hw_info {
757 	__u32 size;
758 	__u32 flags;
759 	__u32 dev_id;
760 	__u32 data_len;
761 	__aligned_u64 data_uptr;
762 	union {
763 		__u32 in_data_type;
764 		__u32 out_data_type;
765 	};
766 	__u8 out_max_pasid_log2;
767 	__u8 __reserved[3];
768 	__aligned_u64 out_capabilities;
769 };
770 #define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO)
771 
772 /*
773  * enum iommufd_hwpt_set_dirty_tracking_flags - Flags for steering dirty
774  *                                              tracking
775  * @IOMMU_HWPT_DIRTY_TRACKING_ENABLE: Enable dirty tracking
776  */
777 enum iommufd_hwpt_set_dirty_tracking_flags {
778 	IOMMU_HWPT_DIRTY_TRACKING_ENABLE = 1,
779 };
780 
781 /**
782  * struct iommu_hwpt_set_dirty_tracking - ioctl(IOMMU_HWPT_SET_DIRTY_TRACKING)
783  * @size: sizeof(struct iommu_hwpt_set_dirty_tracking)
784  * @flags: Combination of enum iommufd_hwpt_set_dirty_tracking_flags
785  * @hwpt_id: HW pagetable ID that represents the IOMMU domain
786  * @__reserved: Must be 0
787  *
788  * Toggle dirty tracking on an HW pagetable.
789  */
790 struct iommu_hwpt_set_dirty_tracking {
791 	__u32 size;
792 	__u32 flags;
793 	__u32 hwpt_id;
794 	__u32 __reserved;
795 };
796 #define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \
797 					  IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING)
798 
799 /**
800  * enum iommufd_hwpt_get_dirty_bitmap_flags - Flags for getting dirty bits
801  * @IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR: Just read the PTEs without clearing
802  *                                        any dirty bits metadata. This flag
803  *                                        can be passed in the expectation
804  *                                        where the next operation is an unmap
805  *                                        of the same IOVA range.
806  *
807  */
808 enum iommufd_hwpt_get_dirty_bitmap_flags {
809 	IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR = 1,
810 };
811 
812 /**
813  * struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP)
814  * @size: sizeof(struct iommu_hwpt_get_dirty_bitmap)
815  * @hwpt_id: HW pagetable ID that represents the IOMMU domain
816  * @flags: Combination of enum iommufd_hwpt_get_dirty_bitmap_flags
817  * @__reserved: Must be 0
818  * @iova: base IOVA of the bitmap first bit
819  * @length: IOVA range size
820  * @page_size: page size granularity of each bit in the bitmap
821  * @data: bitmap where to set the dirty bits. The bitmap bits each
822  *        represent a page_size which you deviate from an arbitrary iova.
823  *
824  * Checking a given IOVA is dirty:
825  *
826  *  data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64))
827  *
828  * Walk the IOMMU pagetables for a given IOVA range to return a bitmap
829  * with the dirty IOVAs. In doing so it will also by default clear any
830  * dirty bit metadata set in the IOPTE.
831  */
832 struct iommu_hwpt_get_dirty_bitmap {
833 	__u32 size;
834 	__u32 hwpt_id;
835 	__u32 flags;
836 	__u32 __reserved;
837 	__aligned_u64 iova;
838 	__aligned_u64 length;
839 	__aligned_u64 page_size;
840 	__aligned_u64 data;
841 };
842 #define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \
843 					IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP)
844 
845 /**
846  * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation
847  *                                        Data Type
848  * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1
849  * @IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3: Invalidation data for ARM SMMUv3
850  */
851 enum iommu_hwpt_invalidate_data_type {
852 	IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0,
853 	IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3 = 1,
854 };
855 
856 /**
857  * enum iommu_hwpt_vtd_s1_invalidate_flags - Flags for Intel VT-d
858  *                                           stage-1 cache invalidation
859  * @IOMMU_VTD_INV_FLAGS_LEAF: Indicates whether the invalidation applies
860  *                            to all-levels page structure cache or just
861  *                            the leaf PTE cache.
862  */
863 enum iommu_hwpt_vtd_s1_invalidate_flags {
864 	IOMMU_VTD_INV_FLAGS_LEAF = 1 << 0,
865 };
866 
867 /**
868  * struct iommu_hwpt_vtd_s1_invalidate - Intel VT-d cache invalidation
869  *                                       (IOMMU_HWPT_INVALIDATE_DATA_VTD_S1)
870  * @addr: The start address of the range to be invalidated. It needs to
871  *        be 4KB aligned.
872  * @npages: Number of contiguous 4K pages to be invalidated.
873  * @flags: Combination of enum iommu_hwpt_vtd_s1_invalidate_flags
874  * @__reserved: Must be 0
875  *
876  * The Intel VT-d specific invalidation data for user-managed stage-1 cache
877  * invalidation in nested translation. Userspace uses this structure to
878  * tell the impacted cache scope after modifying the stage-1 page table.
879  *
880  * Invalidating all the caches related to the page table by setting @addr
881  * to be 0 and @npages to be U64_MAX.
882  *
883  * The device TLB will be invalidated automatically if ATS is enabled.
884  */
885 struct iommu_hwpt_vtd_s1_invalidate {
886 	__aligned_u64 addr;
887 	__aligned_u64 npages;
888 	__u32 flags;
889 	__u32 __reserved;
890 };
891 
892 /**
893  * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cache invalidation
894  *         (IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3)
895  * @cmd: 128-bit cache invalidation command that runs in SMMU CMDQ.
896  *       Must be little-endian.
897  *
898  * Supported command list only when passing in a vIOMMU via @hwpt_id:
899  *     CMDQ_OP_TLBI_NSNH_ALL
900  *     CMDQ_OP_TLBI_NH_VA
901  *     CMDQ_OP_TLBI_NH_VAA
902  *     CMDQ_OP_TLBI_NH_ALL
903  *     CMDQ_OP_TLBI_NH_ASID
904  *     CMDQ_OP_ATC_INV
905  *     CMDQ_OP_CFGI_CD
906  *     CMDQ_OP_CFGI_CD_ALL
907  *
908  * -EIO will be returned if the command is not supported.
909  */
910 struct iommu_viommu_arm_smmuv3_invalidate {
911 	__aligned_le64 cmd[2];
912 };
913 
914 /**
915  * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
916  * @size: sizeof(struct iommu_hwpt_invalidate)
917  * @hwpt_id: ID of a nested HWPT or a vIOMMU, for cache invalidation
918  * @data_uptr: User pointer to an array of driver-specific cache invalidation
919  *             data.
920  * @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data
921  *             type of all the entries in the invalidation request array. It
922  *             should be a type supported by the hwpt pointed by @hwpt_id.
923  * @entry_len: Length (in bytes) of a request entry in the request array
924  * @entry_num: Input the number of cache invalidation requests in the array.
925  *             Output the number of requests successfully handled by kernel.
926  * @__reserved: Must be 0.
927  *
928  * Invalidate iommu cache for user-managed page table or vIOMMU. Modifications
929  * on a user-managed page table should be followed by this operation, if a HWPT
930  * is passed in via @hwpt_id. Other caches, such as device cache or descriptor
931  * cache can be flushed if a vIOMMU is passed in via the @hwpt_id field.
932  *
933  * Each ioctl can support one or more cache invalidation requests in the array
934  * that has a total size of @entry_len * @entry_num.
935  *
936  * An empty invalidation request array by setting @entry_num==0 is allowed, and
937  * @entry_len and @data_uptr would be ignored in this case. This can be used to
938  * check if the given @data_type is supported or not by kernel.
939  */
940 struct iommu_hwpt_invalidate {
941 	__u32 size;
942 	__u32 hwpt_id;
943 	__aligned_u64 data_uptr;
944 	__u32 data_type;
945 	__u32 entry_len;
946 	__u32 entry_num;
947 	__u32 __reserved;
948 };
949 #define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE)
950 
951 /**
952  * enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault
953  * @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is
954  *                                   valid.
955  * @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group.
956  */
957 enum iommu_hwpt_pgfault_flags {
958 	IOMMU_PGFAULT_FLAGS_PASID_VALID		= (1 << 0),
959 	IOMMU_PGFAULT_FLAGS_LAST_PAGE		= (1 << 1),
960 };
961 
962 /**
963  * enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault
964  * @IOMMU_PGFAULT_PERM_READ: request for read permission
965  * @IOMMU_PGFAULT_PERM_WRITE: request for write permission
966  * @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the
967  *                           Execute Requested bit set in PASID TLP Prefix.
968  * @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the
969  *                           Privileged Mode Requested bit set in PASID TLP
970  *                           Prefix.
971  */
972 enum iommu_hwpt_pgfault_perm {
973 	IOMMU_PGFAULT_PERM_READ			= (1 << 0),
974 	IOMMU_PGFAULT_PERM_WRITE		= (1 << 1),
975 	IOMMU_PGFAULT_PERM_EXEC			= (1 << 2),
976 	IOMMU_PGFAULT_PERM_PRIV			= (1 << 3),
977 };
978 
979 /**
980  * struct iommu_hwpt_pgfault - iommu page fault data
981  * @flags: Combination of enum iommu_hwpt_pgfault_flags
982  * @dev_id: id of the originated device
983  * @pasid: Process Address Space ID
984  * @grpid: Page Request Group Index
985  * @perm: Combination of enum iommu_hwpt_pgfault_perm
986  * @__reserved: Must be 0.
987  * @addr: Fault address
988  * @length: a hint of how much data the requestor is expecting to fetch. For
989  *          example, if the PRI initiator knows it is going to do a 10MB
990  *          transfer, it could fill in 10MB and the OS could pre-fault in
991  *          10MB of IOVA. It's default to 0 if there's no such hint.
992  * @cookie: kernel-managed cookie identifying a group of fault messages. The
993  *          cookie number encoded in the last page fault of the group should
994  *          be echoed back in the response message.
995  */
996 struct iommu_hwpt_pgfault {
997 	__u32 flags;
998 	__u32 dev_id;
999 	__u32 pasid;
1000 	__u32 grpid;
1001 	__u32 perm;
1002 	__u32 __reserved;
1003 	__aligned_u64 addr;
1004 	__u32 length;
1005 	__u32 cookie;
1006 };
1007 
1008 /**
1009  * enum iommufd_page_response_code - Return status of fault handlers
1010  * @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables
1011  *                             populated, retry the access. This is the
1012  *                             "Success" defined in PCI 10.4.2.1.
1013  * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
1014  *                             access. This is the "Invalid Request" in PCI
1015  *                             10.4.2.1.
1016  */
1017 enum iommufd_page_response_code {
1018 	IOMMUFD_PAGE_RESP_SUCCESS = 0,
1019 	IOMMUFD_PAGE_RESP_INVALID = 1,
1020 };
1021 
1022 /**
1023  * struct iommu_hwpt_page_response - IOMMU page fault response
1024  * @cookie: The kernel-managed cookie reported in the fault message.
1025  * @code: One of response code in enum iommufd_page_response_code.
1026  */
1027 struct iommu_hwpt_page_response {
1028 	__u32 cookie;
1029 	__u32 code;
1030 };
1031 
1032 /**
1033  * struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC)
1034  * @size: sizeof(struct iommu_fault_alloc)
1035  * @flags: Must be 0
1036  * @out_fault_id: The ID of the new FAULT
1037  * @out_fault_fd: The fd of the new FAULT
1038  *
1039  * Explicitly allocate a fault handling object.
1040  */
1041 struct iommu_fault_alloc {
1042 	__u32 size;
1043 	__u32 flags;
1044 	__u32 out_fault_id;
1045 	__u32 out_fault_fd;
1046 };
1047 #define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC)
1048 
1049 /**
1050  * enum iommu_viommu_type - Virtual IOMMU Type
1051  * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use
1052  * @IOMMU_VIOMMU_TYPE_ARM_SMMUV3: ARM SMMUv3 driver specific type
1053  * @IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM
1054  *                                    SMMUv3) enabled ARM SMMUv3 type
1055  */
1056 enum iommu_viommu_type {
1057 	IOMMU_VIOMMU_TYPE_DEFAULT = 0,
1058 	IOMMU_VIOMMU_TYPE_ARM_SMMUV3 = 1,
1059 	/*
1060 	 * TEGRA241_CMDQV requirements (otherwise, VCMDQs will not work)
1061 	 * - Kernel will allocate a VINTF (HYP_OWN=0) to back this VIOMMU. So,
1062 	 *   VMM must wire the HYP_OWN bit to 0 in guest VINTF_CONFIG register
1063 	 */
1064 	IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV = 2,
1065 };
1066 
1067 /**
1068  * struct iommu_viommu_tegra241_cmdqv - NVIDIA Tegra241 CMDQV Virtual Interface
1069  *                                      (IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV)
1070  * @out_vintf_mmap_offset: mmap offset argument for VINTF's page0
1071  * @out_vintf_mmap_length: mmap length argument for VINTF's page0
1072  *
1073  * Both @out_vintf_mmap_offset and @out_vintf_mmap_length are reported by kernel
1074  * for user space to mmap the VINTF page0 from the host physical address space
1075  * to the guest physical address space so that a guest kernel can directly R/W
1076  * access to the VINTF page0 in order to control its virtual command queues.
1077  */
1078 struct iommu_viommu_tegra241_cmdqv {
1079 	__aligned_u64 out_vintf_mmap_offset;
1080 	__aligned_u64 out_vintf_mmap_length;
1081 };
1082 
1083 /**
1084  * struct iommu_viommu_alloc - ioctl(IOMMU_VIOMMU_ALLOC)
1085  * @size: sizeof(struct iommu_viommu_alloc)
1086  * @flags: Must be 0
1087  * @type: Type of the virtual IOMMU. Must be defined in enum iommu_viommu_type
1088  * @dev_id: The device's physical IOMMU will be used to back the virtual IOMMU
1089  * @hwpt_id: ID of a nesting parent HWPT to associate to
1090  * @out_viommu_id: Output virtual IOMMU ID for the allocated object
1091  * @data_len: Length of the type specific data
1092  * @__reserved: Must be 0
1093  * @data_uptr: User pointer to a driver-specific virtual IOMMU data
1094  *
1095  * Allocate a virtual IOMMU object, representing the underlying physical IOMMU's
1096  * virtualization support that is a security-isolated slice of the real IOMMU HW
1097  * that is unique to a specific VM. Operations global to the IOMMU are connected
1098  * to the vIOMMU, such as:
1099  * - Security namespace for guest owned ID, e.g. guest-controlled cache tags
1100  * - Non-device-affiliated event reporting, e.g. invalidation queue errors
1101  * - Access to a sharable nesting parent pagetable across physical IOMMUs
1102  * - Virtualization of various platforms IDs, e.g. RIDs and others
1103  * - Delivery of paravirtualized invalidation
1104  * - Direct assigned invalidation queues
1105  * - Direct assigned interrupts
1106  */
1107 struct iommu_viommu_alloc {
1108 	__u32 size;
1109 	__u32 flags;
1110 	__u32 type;
1111 	__u32 dev_id;
1112 	__u32 hwpt_id;
1113 	__u32 out_viommu_id;
1114 	__u32 data_len;
1115 	__u32 __reserved;
1116 	__aligned_u64 data_uptr;
1117 };
1118 #define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC)
1119 
1120 /**
1121  * struct iommu_vdevice_alloc - ioctl(IOMMU_VDEVICE_ALLOC)
1122  * @size: sizeof(struct iommu_vdevice_alloc)
1123  * @viommu_id: vIOMMU ID to associate with the virtual device
1124  * @dev_id: The physical device to allocate a virtual instance on the vIOMMU
1125  * @out_vdevice_id: Object handle for the vDevice. Pass to IOMMU_DESTORY
1126  * @virt_id: Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID
1127  *           of AMD IOMMU, and vRID of Intel VT-d
1128  *
1129  * Allocate a virtual device instance (for a physical device) against a vIOMMU.
1130  * This instance holds the device's information (related to its vIOMMU) in a VM.
1131  * User should use IOMMU_DESTROY to destroy the virtual device before
1132  * destroying the physical device (by closing vfio_cdev fd). Otherwise the
1133  * virtual device would be forcibly destroyed on physical device destruction,
1134  * its vdevice_id would be permanently leaked (unremovable & unreusable) until
1135  * iommu fd closed.
1136  */
1137 struct iommu_vdevice_alloc {
1138 	__u32 size;
1139 	__u32 viommu_id;
1140 	__u32 dev_id;
1141 	__u32 out_vdevice_id;
1142 	__aligned_u64 virt_id;
1143 };
1144 #define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC)
1145 
1146 /**
1147  * struct iommu_ioas_change_process - ioctl(VFIO_IOAS_CHANGE_PROCESS)
1148  * @size: sizeof(struct iommu_ioas_change_process)
1149  * @__reserved: Must be 0
1150  *
1151  * This transfers pinned memory counts for every memory map in every IOAS
1152  * in the context to the current process.  This only supports maps created
1153  * with IOMMU_IOAS_MAP_FILE, and returns EINVAL if other maps are present.
1154  * If the ioctl returns a failure status, then nothing is changed.
1155  *
1156  * This API is useful for transferring operation of a device from one process
1157  * to another, such as during userland live update.
1158  */
1159 struct iommu_ioas_change_process {
1160 	__u32 size;
1161 	__u32 __reserved;
1162 };
1163 
1164 #define IOMMU_IOAS_CHANGE_PROCESS \
1165 	_IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS)
1166 
1167 /**
1168  * enum iommu_veventq_flag - flag for struct iommufd_vevent_header
1169  * @IOMMU_VEVENTQ_FLAG_LOST_EVENTS: vEVENTQ has lost vEVENTs
1170  */
1171 enum iommu_veventq_flag {
1172 	IOMMU_VEVENTQ_FLAG_LOST_EVENTS = (1U << 0),
1173 };
1174 
1175 /**
1176  * struct iommufd_vevent_header - Virtual Event Header for a vEVENTQ Status
1177  * @flags: Combination of enum iommu_veventq_flag
1178  * @sequence: The sequence index of a vEVENT in the vEVENTQ, with a range of
1179  *            [0, INT_MAX] where the following index of INT_MAX is 0
1180  *
1181  * Each iommufd_vevent_header reports a sequence index of the following vEVENT:
1182  *
1183  * +----------------------+-------+----------------------+-------+---+-------+
1184  * | header0 {sequence=0} | data0 | header1 {sequence=1} | data1 |...| dataN |
1185  * +----------------------+-------+----------------------+-------+---+-------+
1186  *
1187  * And this sequence index is expected to be monotonic to the sequence index of
1188  * the previous vEVENT. If two adjacent sequence indexes has a delta larger than
1189  * 1, it means that delta - 1 number of vEVENTs has lost, e.g. two lost vEVENTs:
1190  *
1191  * +-----+----------------------+-------+----------------------+-------+-----+
1192  * | ... | header3 {sequence=3} | data3 | header6 {sequence=6} | data6 | ... |
1193  * +-----+----------------------+-------+----------------------+-------+-----+
1194  *
1195  * If a vEVENT lost at the tail of the vEVENTQ and there is no following vEVENT
1196  * providing the next sequence index, an IOMMU_VEVENTQ_FLAG_LOST_EVENTS header
1197  * would be added to the tail, and no data would follow this header:
1198  *
1199  * +--+----------------------+-------+-----------------------------------------+
1200  * |..| header3 {sequence=3} | data3 | header4 {flags=LOST_EVENTS, sequence=4} |
1201  * +--+----------------------+-------+-----------------------------------------+
1202  */
1203 struct iommufd_vevent_header {
1204 	__u32 flags;
1205 	__u32 sequence;
1206 };
1207 
1208 /**
1209  * enum iommu_veventq_type - Virtual Event Queue Type
1210  * @IOMMU_VEVENTQ_TYPE_DEFAULT: Reserved for future use
1211  * @IOMMU_VEVENTQ_TYPE_ARM_SMMUV3: ARM SMMUv3 Virtual Event Queue
1212  * @IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV Extension IRQ
1213  */
1214 enum iommu_veventq_type {
1215 	IOMMU_VEVENTQ_TYPE_DEFAULT = 0,
1216 	IOMMU_VEVENTQ_TYPE_ARM_SMMUV3 = 1,
1217 	IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV = 2,
1218 };
1219 
1220 /**
1221  * struct iommu_vevent_arm_smmuv3 - ARM SMMUv3 Virtual Event
1222  *                                  (IOMMU_VEVENTQ_TYPE_ARM_SMMUV3)
1223  * @evt: 256-bit ARM SMMUv3 Event record, little-endian.
1224  *       Reported event records: (Refer to "7.3 Event records" in SMMUv3 HW Spec)
1225  *       - 0x04 C_BAD_STE
1226  *       - 0x06 F_STREAM_DISABLED
1227  *       - 0x08 C_BAD_SUBSTREAMID
1228  *       - 0x0a C_BAD_CD
1229  *       - 0x10 F_TRANSLATION
1230  *       - 0x11 F_ADDR_SIZE
1231  *       - 0x12 F_ACCESS
1232  *       - 0x13 F_PERMISSION
1233  *
1234  * StreamID field reports a virtual device ID. To receive a virtual event for a
1235  * device, a vDEVICE must be allocated via IOMMU_VDEVICE_ALLOC.
1236  */
1237 struct iommu_vevent_arm_smmuv3 {
1238 	__aligned_le64 evt[4];
1239 };
1240 
1241 /**
1242  * struct iommu_vevent_tegra241_cmdqv - Tegra241 CMDQV IRQ
1243  *                                      (IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV)
1244  * @lvcmdq_err_map: 128-bit logical vcmdq error map, little-endian.
1245  *                  (Refer to register LVCMDQ_ERR_MAPs per VINTF )
1246  *
1247  * The 128-bit register value from HW exclusively reflect the error bits for a
1248  * Virtual Interface represented by a vIOMMU object. Read and report directly.
1249  */
1250 struct iommu_vevent_tegra241_cmdqv {
1251 	__aligned_le64 lvcmdq_err_map[2];
1252 };
1253 
1254 /**
1255  * struct iommu_veventq_alloc - ioctl(IOMMU_VEVENTQ_ALLOC)
1256  * @size: sizeof(struct iommu_veventq_alloc)
1257  * @flags: Must be 0
1258  * @viommu_id: virtual IOMMU ID to associate the vEVENTQ with
1259  * @type: Type of the vEVENTQ. Must be defined in enum iommu_veventq_type
1260  * @veventq_depth: Maximum number of events in the vEVENTQ
1261  * @out_veventq_id: The ID of the new vEVENTQ
1262  * @out_veventq_fd: The fd of the new vEVENTQ. User space must close the
1263  *                  successfully returned fd after using it
1264  * @__reserved: Must be 0
1265  *
1266  * Explicitly allocate a virtual event queue interface for a vIOMMU. A vIOMMU
1267  * can have multiple FDs for different types, but is confined to one per @type.
1268  * User space should open the @out_veventq_fd to read vEVENTs out of a vEVENTQ,
1269  * if there are vEVENTs available. A vEVENTQ will lose events due to overflow,
1270  * if the number of the vEVENTs hits @veventq_depth.
1271  *
1272  * Each vEVENT in a vEVENTQ encloses a struct iommufd_vevent_header followed by
1273  * a type-specific data structure, in a normal case:
1274  *
1275  * +-+---------+-------+---------+-------+-----+---------+-------+-+
1276  * | | header0 | data0 | header1 | data1 | ... | headerN | dataN | |
1277  * +-+---------+-------+---------+-------+-----+---------+-------+-+
1278  *
1279  * unless a tailing IOMMU_VEVENTQ_FLAG_LOST_EVENTS header is logged (refer to
1280  * struct iommufd_vevent_header).
1281  */
1282 struct iommu_veventq_alloc {
1283 	__u32 size;
1284 	__u32 flags;
1285 	__u32 viommu_id;
1286 	__u32 type;
1287 	__u32 veventq_depth;
1288 	__u32 out_veventq_id;
1289 	__u32 out_veventq_fd;
1290 	__u32 __reserved;
1291 };
1292 #define IOMMU_VEVENTQ_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VEVENTQ_ALLOC)
1293 
1294 /**
1295  * enum iommu_hw_queue_type - HW Queue Type
1296  * @IOMMU_HW_QUEUE_TYPE_DEFAULT: Reserved for future use
1297  * @IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM
1298  *                                      SMMUv3) Virtual Command Queue (VCMDQ)
1299  */
1300 enum iommu_hw_queue_type {
1301 	IOMMU_HW_QUEUE_TYPE_DEFAULT = 0,
1302 	/*
1303 	 * TEGRA241_CMDQV requirements (otherwise, allocation will fail)
1304 	 * - alloc starts from the lowest @index=0 in ascending order
1305 	 * - destroy starts from the last allocated @index in descending order
1306 	 * - @base_addr must be aligned to @length in bytes and mapped in IOAS
1307 	 * - @length must be a power of 2, with a minimum 32 bytes and a maximum
1308 	 *   2 ^ idr[1].CMDQS * 16 bytes (use GET_HW_INFO call to read idr[1]
1309 	 *   from struct iommu_hw_info_arm_smmuv3)
1310 	 * - suggest to back the queue memory with contiguous physical pages or
1311 	 *   a single huge page with alignment of the queue size, and limit the
1312 	 *   emulated vSMMU's IDR1.CMDQS to log2(huge page size / 16 bytes)
1313 	 */
1314 	IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV = 1,
1315 };
1316 
1317 /**
1318  * struct iommu_hw_queue_alloc - ioctl(IOMMU_HW_QUEUE_ALLOC)
1319  * @size: sizeof(struct iommu_hw_queue_alloc)
1320  * @flags: Must be 0
1321  * @viommu_id: Virtual IOMMU ID to associate the HW queue with
1322  * @type: One of enum iommu_hw_queue_type
1323  * @index: The logical index to the HW queue per virtual IOMMU for a multi-queue
1324  *         model
1325  * @out_hw_queue_id: The ID of the new HW queue
1326  * @nesting_parent_iova: Base address of the queue memory in the guest physical
1327  *                       address space
1328  * @length: Length of the queue memory
1329  *
1330  * Allocate a HW queue object for a vIOMMU-specific HW-accelerated queue, which
1331  * allows HW to access a guest queue memory described using @nesting_parent_iova
1332  * and @length.
1333  *
1334  * A vIOMMU can allocate multiple queues, but it must use a different @index per
1335  * type to separate each allocation, e.g::
1336  *
1337  *     Type1 HW queue0, Type1 HW queue1, Type2 HW queue0, ...
1338  */
1339 struct iommu_hw_queue_alloc {
1340 	__u32 size;
1341 	__u32 flags;
1342 	__u32 viommu_id;
1343 	__u32 type;
1344 	__u32 index;
1345 	__u32 out_hw_queue_id;
1346 	__aligned_u64 nesting_parent_iova;
1347 	__aligned_u64 length;
1348 };
1349 #define IOMMU_HW_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HW_QUEUE_ALLOC)
1350 #endif
1351