1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. 3 */ 4 #ifndef _IOMMUFD_H 5 #define _IOMMUFD_H 6 7 #include <linux/ioctl.h> 8 #include <linux/types.h> 9 10 #define IOMMUFD_TYPE (';') 11 12 /** 13 * DOC: General ioctl format 14 * 15 * The ioctl interface follows a general format to allow for extensibility. Each 16 * ioctl is passed in a structure pointer as the argument providing the size of 17 * the structure in the first u32. The kernel checks that any structure space 18 * beyond what it understands is 0. This allows userspace to use the backward 19 * compatible portion while consistently using the newer, larger, structures. 20 * 21 * ioctls use a standard meaning for common errnos: 22 * 23 * - ENOTTY: The IOCTL number itself is not supported at all 24 * - E2BIG: The IOCTL number is supported, but the provided structure has 25 * non-zero in a part the kernel does not understand. 26 * - EOPNOTSUPP: The IOCTL number is supported, and the structure is 27 * understood, however a known field has a value the kernel does not 28 * understand or support. 29 * - EINVAL: Everything about the IOCTL was understood, but a field is not 30 * correct. 31 * - ENOENT: An ID or IOVA provided does not exist. 32 * - ENOMEM: Out of memory. 33 * - EOVERFLOW: Mathematics overflowed. 34 * 35 * As well as additional errnos, within specific ioctls. 36 */ 37 enum { 38 IOMMUFD_CMD_BASE = 0x80, 39 IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE, 40 IOMMUFD_CMD_IOAS_ALLOC = 0x81, 41 IOMMUFD_CMD_IOAS_ALLOW_IOVAS = 0x82, 42 IOMMUFD_CMD_IOAS_COPY = 0x83, 43 IOMMUFD_CMD_IOAS_IOVA_RANGES = 0x84, 44 IOMMUFD_CMD_IOAS_MAP = 0x85, 45 IOMMUFD_CMD_IOAS_UNMAP = 0x86, 46 IOMMUFD_CMD_OPTION = 0x87, 47 IOMMUFD_CMD_VFIO_IOAS = 0x88, 48 IOMMUFD_CMD_HWPT_ALLOC = 0x89, 49 IOMMUFD_CMD_GET_HW_INFO = 0x8a, 50 IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING = 0x8b, 51 IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c, 52 IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d, 53 IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e, 54 IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f, 55 IOMMUFD_CMD_VIOMMU_ALLOC = 0x90, 56 IOMMUFD_CMD_VDEVICE_ALLOC = 0x91, 57 IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92, 58 }; 59 60 /** 61 * struct iommu_destroy - ioctl(IOMMU_DESTROY) 62 * @size: sizeof(struct iommu_destroy) 63 * @id: iommufd object ID to destroy. Can be any destroyable object type. 64 * 65 * Destroy any object held within iommufd. 66 */ 67 struct iommu_destroy { 68 __u32 size; 69 __u32 id; 70 }; 71 #define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY) 72 73 /** 74 * struct iommu_ioas_alloc - ioctl(IOMMU_IOAS_ALLOC) 75 * @size: sizeof(struct iommu_ioas_alloc) 76 * @flags: Must be 0 77 * @out_ioas_id: Output IOAS ID for the allocated object 78 * 79 * Allocate an IO Address Space (IOAS) which holds an IO Virtual Address (IOVA) 80 * to memory mapping. 81 */ 82 struct iommu_ioas_alloc { 83 __u32 size; 84 __u32 flags; 85 __u32 out_ioas_id; 86 }; 87 #define IOMMU_IOAS_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOC) 88 89 /** 90 * struct iommu_iova_range - ioctl(IOMMU_IOVA_RANGE) 91 * @start: First IOVA 92 * @last: Inclusive last IOVA 93 * 94 * An interval in IOVA space. 95 */ 96 struct iommu_iova_range { 97 __aligned_u64 start; 98 __aligned_u64 last; 99 }; 100 101 /** 102 * struct iommu_ioas_iova_ranges - ioctl(IOMMU_IOAS_IOVA_RANGES) 103 * @size: sizeof(struct iommu_ioas_iova_ranges) 104 * @ioas_id: IOAS ID to read ranges from 105 * @num_iovas: Input/Output total number of ranges in the IOAS 106 * @__reserved: Must be 0 107 * @allowed_iovas: Pointer to the output array of struct iommu_iova_range 108 * @out_iova_alignment: Minimum alignment required for mapping IOVA 109 * 110 * Query an IOAS for ranges of allowed IOVAs. Mapping IOVA outside these ranges 111 * is not allowed. num_iovas will be set to the total number of iovas and 112 * the allowed_iovas[] will be filled in as space permits. 113 * 114 * The allowed ranges are dependent on the HW path the DMA operation takes, and 115 * can change during the lifetime of the IOAS. A fresh empty IOAS will have a 116 * full range, and each attached device will narrow the ranges based on that 117 * device's HW restrictions. Detaching a device can widen the ranges. Userspace 118 * should query ranges after every attach/detach to know what IOVAs are valid 119 * for mapping. 120 * 121 * On input num_iovas is the length of the allowed_iovas array. On output it is 122 * the total number of iovas filled in. The ioctl will return -EMSGSIZE and set 123 * num_iovas to the required value if num_iovas is too small. In this case the 124 * caller should allocate a larger output array and re-issue the ioctl. 125 * 126 * out_iova_alignment returns the minimum IOVA alignment that can be given 127 * to IOMMU_IOAS_MAP/COPY. IOVA's must satisfy:: 128 * 129 * starting_iova % out_iova_alignment == 0 130 * (starting_iova + length) % out_iova_alignment == 0 131 * 132 * out_iova_alignment can be 1 indicating any IOVA is allowed. It cannot 133 * be higher than the system PAGE_SIZE. 134 */ 135 struct iommu_ioas_iova_ranges { 136 __u32 size; 137 __u32 ioas_id; 138 __u32 num_iovas; 139 __u32 __reserved; 140 __aligned_u64 allowed_iovas; 141 __aligned_u64 out_iova_alignment; 142 }; 143 #define IOMMU_IOAS_IOVA_RANGES _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_IOVA_RANGES) 144 145 /** 146 * struct iommu_ioas_allow_iovas - ioctl(IOMMU_IOAS_ALLOW_IOVAS) 147 * @size: sizeof(struct iommu_ioas_allow_iovas) 148 * @ioas_id: IOAS ID to allow IOVAs from 149 * @num_iovas: Input/Output total number of ranges in the IOAS 150 * @__reserved: Must be 0 151 * @allowed_iovas: Pointer to array of struct iommu_iova_range 152 * 153 * Ensure a range of IOVAs are always available for allocation. If this call 154 * succeeds then IOMMU_IOAS_IOVA_RANGES will never return a list of IOVA ranges 155 * that are narrower than the ranges provided here. This call will fail if 156 * IOMMU_IOAS_IOVA_RANGES is currently narrower than the given ranges. 157 * 158 * When an IOAS is first created the IOVA_RANGES will be maximally sized, and as 159 * devices are attached the IOVA will narrow based on the device restrictions. 160 * When an allowed range is specified any narrowing will be refused, ie device 161 * attachment can fail if the device requires limiting within the allowed range. 162 * 163 * Automatic IOVA allocation is also impacted by this call. MAP will only 164 * allocate within the allowed IOVAs if they are present. 165 * 166 * This call replaces the entire allowed list with the given list. 167 */ 168 struct iommu_ioas_allow_iovas { 169 __u32 size; 170 __u32 ioas_id; 171 __u32 num_iovas; 172 __u32 __reserved; 173 __aligned_u64 allowed_iovas; 174 }; 175 #define IOMMU_IOAS_ALLOW_IOVAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOW_IOVAS) 176 177 /** 178 * enum iommufd_ioas_map_flags - Flags for map and copy 179 * @IOMMU_IOAS_MAP_FIXED_IOVA: If clear the kernel will compute an appropriate 180 * IOVA to place the mapping at 181 * @IOMMU_IOAS_MAP_WRITEABLE: DMA is allowed to write to this mapping 182 * @IOMMU_IOAS_MAP_READABLE: DMA is allowed to read from this mapping 183 */ 184 enum iommufd_ioas_map_flags { 185 IOMMU_IOAS_MAP_FIXED_IOVA = 1 << 0, 186 IOMMU_IOAS_MAP_WRITEABLE = 1 << 1, 187 IOMMU_IOAS_MAP_READABLE = 1 << 2, 188 }; 189 190 /** 191 * struct iommu_ioas_map - ioctl(IOMMU_IOAS_MAP) 192 * @size: sizeof(struct iommu_ioas_map) 193 * @flags: Combination of enum iommufd_ioas_map_flags 194 * @ioas_id: IOAS ID to change the mapping of 195 * @__reserved: Must be 0 196 * @user_va: Userspace pointer to start mapping from 197 * @length: Number of bytes to map 198 * @iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is set 199 * then this must be provided as input. 200 * 201 * Set an IOVA mapping from a user pointer. If FIXED_IOVA is specified then the 202 * mapping will be established at iova, otherwise a suitable location based on 203 * the reserved and allowed lists will be automatically selected and returned in 204 * iova. 205 * 206 * If IOMMU_IOAS_MAP_FIXED_IOVA is specified then the iova range must currently 207 * be unused, existing IOVA cannot be replaced. 208 */ 209 struct iommu_ioas_map { 210 __u32 size; 211 __u32 flags; 212 __u32 ioas_id; 213 __u32 __reserved; 214 __aligned_u64 user_va; 215 __aligned_u64 length; 216 __aligned_u64 iova; 217 }; 218 #define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP) 219 220 /** 221 * struct iommu_ioas_map_file - ioctl(IOMMU_IOAS_MAP_FILE) 222 * @size: sizeof(struct iommu_ioas_map_file) 223 * @flags: same as for iommu_ioas_map 224 * @ioas_id: same as for iommu_ioas_map 225 * @fd: the memfd to map 226 * @start: byte offset from start of file to map from 227 * @length: same as for iommu_ioas_map 228 * @iova: same as for iommu_ioas_map 229 * 230 * Set an IOVA mapping from a memfd file. All other arguments and semantics 231 * match those of IOMMU_IOAS_MAP. 232 */ 233 struct iommu_ioas_map_file { 234 __u32 size; 235 __u32 flags; 236 __u32 ioas_id; 237 __s32 fd; 238 __aligned_u64 start; 239 __aligned_u64 length; 240 __aligned_u64 iova; 241 }; 242 #define IOMMU_IOAS_MAP_FILE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP_FILE) 243 244 /** 245 * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY) 246 * @size: sizeof(struct iommu_ioas_copy) 247 * @flags: Combination of enum iommufd_ioas_map_flags 248 * @dst_ioas_id: IOAS ID to change the mapping of 249 * @src_ioas_id: IOAS ID to copy from 250 * @length: Number of bytes to copy and map 251 * @dst_iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is 252 * set then this must be provided as input. 253 * @src_iova: IOVA to start the copy 254 * 255 * Copy an already existing mapping from src_ioas_id and establish it in 256 * dst_ioas_id. The src iova/length must exactly match a range used with 257 * IOMMU_IOAS_MAP. 258 * 259 * This may be used to efficiently clone a subset of an IOAS to another, or as a 260 * kind of 'cache' to speed up mapping. Copy has an efficiency advantage over 261 * establishing equivalent new mappings, as internal resources are shared, and 262 * the kernel will pin the user memory only once. 263 */ 264 struct iommu_ioas_copy { 265 __u32 size; 266 __u32 flags; 267 __u32 dst_ioas_id; 268 __u32 src_ioas_id; 269 __aligned_u64 length; 270 __aligned_u64 dst_iova; 271 __aligned_u64 src_iova; 272 }; 273 #define IOMMU_IOAS_COPY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_COPY) 274 275 /** 276 * struct iommu_ioas_unmap - ioctl(IOMMU_IOAS_UNMAP) 277 * @size: sizeof(struct iommu_ioas_unmap) 278 * @ioas_id: IOAS ID to change the mapping of 279 * @iova: IOVA to start the unmapping at 280 * @length: Number of bytes to unmap, and return back the bytes unmapped 281 * 282 * Unmap an IOVA range. The iova/length must be a superset of a previously 283 * mapped range used with IOMMU_IOAS_MAP or IOMMU_IOAS_COPY. Splitting or 284 * truncating ranges is not allowed. The values 0 to U64_MAX will unmap 285 * everything. 286 */ 287 struct iommu_ioas_unmap { 288 __u32 size; 289 __u32 ioas_id; 290 __aligned_u64 iova; 291 __aligned_u64 length; 292 }; 293 #define IOMMU_IOAS_UNMAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_UNMAP) 294 295 /** 296 * enum iommufd_option - ioctl(IOMMU_OPTION_RLIMIT_MODE) and 297 * ioctl(IOMMU_OPTION_HUGE_PAGES) 298 * @IOMMU_OPTION_RLIMIT_MODE: 299 * Change how RLIMIT_MEMLOCK accounting works. The caller must have privilege 300 * to invoke this. Value 0 (default) is user based accouting, 1 uses process 301 * based accounting. Global option, object_id must be 0 302 * @IOMMU_OPTION_HUGE_PAGES: 303 * Value 1 (default) allows contiguous pages to be combined when generating 304 * iommu mappings. Value 0 disables combining, everything is mapped to 305 * PAGE_SIZE. This can be useful for benchmarking. This is a per-IOAS 306 * option, the object_id must be the IOAS ID. 307 */ 308 enum iommufd_option { 309 IOMMU_OPTION_RLIMIT_MODE = 0, 310 IOMMU_OPTION_HUGE_PAGES = 1, 311 }; 312 313 /** 314 * enum iommufd_option_ops - ioctl(IOMMU_OPTION_OP_SET) and 315 * ioctl(IOMMU_OPTION_OP_GET) 316 * @IOMMU_OPTION_OP_SET: Set the option's value 317 * @IOMMU_OPTION_OP_GET: Get the option's value 318 */ 319 enum iommufd_option_ops { 320 IOMMU_OPTION_OP_SET = 0, 321 IOMMU_OPTION_OP_GET = 1, 322 }; 323 324 /** 325 * struct iommu_option - iommu option multiplexer 326 * @size: sizeof(struct iommu_option) 327 * @option_id: One of enum iommufd_option 328 * @op: One of enum iommufd_option_ops 329 * @__reserved: Must be 0 330 * @object_id: ID of the object if required 331 * @val64: Option value to set or value returned on get 332 * 333 * Change a simple option value. This multiplexor allows controlling options 334 * on objects. IOMMU_OPTION_OP_SET will load an option and IOMMU_OPTION_OP_GET 335 * will return the current value. 336 */ 337 struct iommu_option { 338 __u32 size; 339 __u32 option_id; 340 __u16 op; 341 __u16 __reserved; 342 __u32 object_id; 343 __aligned_u64 val64; 344 }; 345 #define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION) 346 347 /** 348 * enum iommufd_vfio_ioas_op - IOMMU_VFIO_IOAS_* ioctls 349 * @IOMMU_VFIO_IOAS_GET: Get the current compatibility IOAS 350 * @IOMMU_VFIO_IOAS_SET: Change the current compatibility IOAS 351 * @IOMMU_VFIO_IOAS_CLEAR: Disable VFIO compatibility 352 */ 353 enum iommufd_vfio_ioas_op { 354 IOMMU_VFIO_IOAS_GET = 0, 355 IOMMU_VFIO_IOAS_SET = 1, 356 IOMMU_VFIO_IOAS_CLEAR = 2, 357 }; 358 359 /** 360 * struct iommu_vfio_ioas - ioctl(IOMMU_VFIO_IOAS) 361 * @size: sizeof(struct iommu_vfio_ioas) 362 * @ioas_id: For IOMMU_VFIO_IOAS_SET the input IOAS ID to set 363 * For IOMMU_VFIO_IOAS_GET will output the IOAS ID 364 * @op: One of enum iommufd_vfio_ioas_op 365 * @__reserved: Must be 0 366 * 367 * The VFIO compatibility support uses a single ioas because VFIO APIs do not 368 * support the ID field. Set or Get the IOAS that VFIO compatibility will use. 369 * When VFIO_GROUP_SET_CONTAINER is used on an iommufd it will get the 370 * compatibility ioas, either by taking what is already set, or auto creating 371 * one. From then on VFIO will continue to use that ioas and is not effected by 372 * this ioctl. SET or CLEAR does not destroy any auto-created IOAS. 373 */ 374 struct iommu_vfio_ioas { 375 __u32 size; 376 __u32 ioas_id; 377 __u16 op; 378 __u16 __reserved; 379 }; 380 #define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS) 381 382 /** 383 * enum iommufd_hwpt_alloc_flags - Flags for HWPT allocation 384 * @IOMMU_HWPT_ALLOC_NEST_PARENT: If set, allocate a HWPT that can serve as 385 * the parent HWPT in a nesting configuration. 386 * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is 387 * enforced on device attachment 388 * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is 389 * valid. 390 * @IOMMU_HWPT_ALLOC_PASID: Requests a domain that can be used with PASID. The 391 * domain can be attached to any PASID on the device. 392 * Any domain attached to the non-PASID part of the 393 * device must also be flaged, otherwise attaching a 394 * PASID will blocked. 395 * If IOMMU does not support PASID it will return 396 * error (-EOPNOTSUPP). 397 */ 398 enum iommufd_hwpt_alloc_flags { 399 IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0, 400 IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1, 401 IOMMU_HWPT_FAULT_ID_VALID = 1 << 2, 402 IOMMU_HWPT_ALLOC_PASID = 1 << 3, 403 }; 404 405 /** 406 * enum iommu_hwpt_vtd_s1_flags - Intel VT-d stage-1 page table 407 * entry attributes 408 * @IOMMU_VTD_S1_SRE: Supervisor request 409 * @IOMMU_VTD_S1_EAFE: Extended access enable 410 * @IOMMU_VTD_S1_WPE: Write protect enable 411 */ 412 enum iommu_hwpt_vtd_s1_flags { 413 IOMMU_VTD_S1_SRE = 1 << 0, 414 IOMMU_VTD_S1_EAFE = 1 << 1, 415 IOMMU_VTD_S1_WPE = 1 << 2, 416 }; 417 418 /** 419 * struct iommu_hwpt_vtd_s1 - Intel VT-d stage-1 page table 420 * info (IOMMU_HWPT_DATA_VTD_S1) 421 * @flags: Combination of enum iommu_hwpt_vtd_s1_flags 422 * @pgtbl_addr: The base address of the stage-1 page table. 423 * @addr_width: The address width of the stage-1 page table 424 * @__reserved: Must be 0 425 */ 426 struct iommu_hwpt_vtd_s1 { 427 __aligned_u64 flags; 428 __aligned_u64 pgtbl_addr; 429 __u32 addr_width; 430 __u32 __reserved; 431 }; 432 433 /** 434 * struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 nested STE 435 * (IOMMU_HWPT_DATA_ARM_SMMUV3) 436 * 437 * @ste: The first two double words of the user space Stream Table Entry for 438 * the translation. Must be little-endian. 439 * Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec) 440 * - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax 441 * - word-1: EATS, S1DSS, S1CIR, S1COR, S1CSH, S1STALLD 442 * 443 * -EIO will be returned if @ste is not legal or contains any non-allowed field. 444 * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass 445 * nested domain will translate the same as the nesting parent. The S1 will 446 * install a Context Descriptor Table pointing at userspace memory translated 447 * by the nesting parent. 448 */ 449 struct iommu_hwpt_arm_smmuv3 { 450 __aligned_le64 ste[2]; 451 }; 452 453 /** 454 * enum iommu_hwpt_data_type - IOMMU HWPT Data Type 455 * @IOMMU_HWPT_DATA_NONE: no data 456 * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table 457 * @IOMMU_HWPT_DATA_ARM_SMMUV3: ARM SMMUv3 Context Descriptor Table 458 */ 459 enum iommu_hwpt_data_type { 460 IOMMU_HWPT_DATA_NONE = 0, 461 IOMMU_HWPT_DATA_VTD_S1 = 1, 462 IOMMU_HWPT_DATA_ARM_SMMUV3 = 2, 463 }; 464 465 /** 466 * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC) 467 * @size: sizeof(struct iommu_hwpt_alloc) 468 * @flags: Combination of enum iommufd_hwpt_alloc_flags 469 * @dev_id: The device to allocate this HWPT for 470 * @pt_id: The IOAS or HWPT or vIOMMU to connect this HWPT to 471 * @out_hwpt_id: The ID of the new HWPT 472 * @__reserved: Must be 0 473 * @data_type: One of enum iommu_hwpt_data_type 474 * @data_len: Length of the type specific data 475 * @data_uptr: User pointer to the type specific data 476 * @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of 477 * IOMMU_HWPT_FAULT_ID_VALID is set. 478 * @__reserved2: Padding to 64-bit alignment. Must be 0. 479 * 480 * Explicitly allocate a hardware page table object. This is the same object 481 * type that is returned by iommufd_device_attach() and represents the 482 * underlying iommu driver's iommu_domain kernel object. 483 * 484 * A kernel-managed HWPT will be created with the mappings from the given 485 * IOAS via the @pt_id. The @data_type for this allocation must be set to 486 * IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a 487 * nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags. 488 * 489 * A user-managed nested HWPT will be created from a given vIOMMU (wrapping a 490 * parent HWPT) or a parent HWPT via @pt_id, in which the parent HWPT must be 491 * allocated previously via the same ioctl from a given IOAS (@pt_id). In this 492 * case, the @data_type must be set to a pre-defined type corresponding to an 493 * I/O page table type supported by the underlying IOMMU hardware. The device 494 * via @dev_id and the vIOMMU via @pt_id must be associated to the same IOMMU 495 * instance. 496 * 497 * If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and 498 * @data_uptr should be zero. Otherwise, both @data_len and @data_uptr 499 * must be given. 500 */ 501 struct iommu_hwpt_alloc { 502 __u32 size; 503 __u32 flags; 504 __u32 dev_id; 505 __u32 pt_id; 506 __u32 out_hwpt_id; 507 __u32 __reserved; 508 __u32 data_type; 509 __u32 data_len; 510 __aligned_u64 data_uptr; 511 __u32 fault_id; 512 __u32 __reserved2; 513 }; 514 #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC) 515 516 /** 517 * enum iommu_hw_info_vtd_flags - Flags for VT-d hw_info 518 * @IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17: If set, disallow read-only mappings 519 * on a nested_parent domain. 520 * https://www.intel.com/content/www/us/en/content-details/772415/content-details.html 521 */ 522 enum iommu_hw_info_vtd_flags { 523 IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 = 1 << 0, 524 }; 525 526 /** 527 * struct iommu_hw_info_vtd - Intel VT-d hardware information 528 * 529 * @flags: Combination of enum iommu_hw_info_vtd_flags 530 * @__reserved: Must be 0 531 * 532 * @cap_reg: Value of Intel VT-d capability register defined in VT-d spec 533 * section 11.4.2 Capability Register. 534 * @ecap_reg: Value of Intel VT-d capability register defined in VT-d spec 535 * section 11.4.3 Extended Capability Register. 536 * 537 * User needs to understand the Intel VT-d specification to decode the 538 * register value. 539 */ 540 struct iommu_hw_info_vtd { 541 __u32 flags; 542 __u32 __reserved; 543 __aligned_u64 cap_reg; 544 __aligned_u64 ecap_reg; 545 }; 546 547 /** 548 * struct iommu_hw_info_arm_smmuv3 - ARM SMMUv3 hardware information 549 * (IOMMU_HW_INFO_TYPE_ARM_SMMUV3) 550 * 551 * @flags: Must be set to 0 552 * @__reserved: Must be 0 553 * @idr: Implemented features for ARM SMMU Non-secure programming interface 554 * @iidr: Information about the implementation and implementer of ARM SMMU, 555 * and architecture version supported 556 * @aidr: ARM SMMU architecture version 557 * 558 * For the details of @idr, @iidr and @aidr, please refer to the chapters 559 * from 6.3.1 to 6.3.6 in the SMMUv3 Spec. 560 * 561 * User space should read the underlying ARM SMMUv3 hardware information for 562 * the list of supported features. 563 * 564 * Note that these values reflect the raw HW capability, without any insight if 565 * any required kernel driver support is present. Bits may be set indicating the 566 * HW has functionality that is lacking kernel software support, such as BTM. If 567 * a VMM is using this information to construct emulated copies of these 568 * registers it should only forward bits that it knows it can support. 569 * 570 * In future, presence of required kernel support will be indicated in flags. 571 */ 572 struct iommu_hw_info_arm_smmuv3 { 573 __u32 flags; 574 __u32 __reserved; 575 __u32 idr[6]; 576 __u32 iidr; 577 __u32 aidr; 578 }; 579 580 /** 581 * enum iommu_hw_info_type - IOMMU Hardware Info Types 582 * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware 583 * info 584 * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type 585 * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type 586 */ 587 enum iommu_hw_info_type { 588 IOMMU_HW_INFO_TYPE_NONE = 0, 589 IOMMU_HW_INFO_TYPE_INTEL_VTD = 1, 590 IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2, 591 }; 592 593 /** 594 * enum iommufd_hw_capabilities 595 * @IOMMU_HW_CAP_DIRTY_TRACKING: IOMMU hardware support for dirty tracking 596 * If available, it means the following APIs 597 * are supported: 598 * 599 * IOMMU_HWPT_GET_DIRTY_BITMAP 600 * IOMMU_HWPT_SET_DIRTY_TRACKING 601 * 602 */ 603 enum iommufd_hw_capabilities { 604 IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0, 605 }; 606 607 /** 608 * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO) 609 * @size: sizeof(struct iommu_hw_info) 610 * @flags: Must be 0 611 * @dev_id: The device bound to the iommufd 612 * @data_len: Input the length of a user buffer in bytes. Output the length of 613 * data that kernel supports 614 * @data_uptr: User pointer to a user-space buffer used by the kernel to fill 615 * the iommu type specific hardware information data 616 * @out_data_type: Output the iommu hardware info type as defined in the enum 617 * iommu_hw_info_type. 618 * @out_capabilities: Output the generic iommu capability info type as defined 619 * in the enum iommu_hw_capabilities. 620 * @__reserved: Must be 0 621 * 622 * Query an iommu type specific hardware information data from an iommu behind 623 * a given device that has been bound to iommufd. This hardware info data will 624 * be used to sync capabilities between the virtual iommu and the physical 625 * iommu, e.g. a nested translation setup needs to check the hardware info, so 626 * a guest stage-1 page table can be compatible with the physical iommu. 627 * 628 * To capture an iommu type specific hardware information data, @data_uptr and 629 * its length @data_len must be provided. Trailing bytes will be zeroed if the 630 * user buffer is larger than the data that kernel has. Otherwise, kernel only 631 * fills the buffer using the given length in @data_len. If the ioctl succeeds, 632 * @data_len will be updated to the length that kernel actually supports, 633 * @out_data_type will be filled to decode the data filled in the buffer 634 * pointed by @data_uptr. Input @data_len == zero is allowed. 635 */ 636 struct iommu_hw_info { 637 __u32 size; 638 __u32 flags; 639 __u32 dev_id; 640 __u32 data_len; 641 __aligned_u64 data_uptr; 642 __u32 out_data_type; 643 __u32 __reserved; 644 __aligned_u64 out_capabilities; 645 }; 646 #define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO) 647 648 /* 649 * enum iommufd_hwpt_set_dirty_tracking_flags - Flags for steering dirty 650 * tracking 651 * @IOMMU_HWPT_DIRTY_TRACKING_ENABLE: Enable dirty tracking 652 */ 653 enum iommufd_hwpt_set_dirty_tracking_flags { 654 IOMMU_HWPT_DIRTY_TRACKING_ENABLE = 1, 655 }; 656 657 /** 658 * struct iommu_hwpt_set_dirty_tracking - ioctl(IOMMU_HWPT_SET_DIRTY_TRACKING) 659 * @size: sizeof(struct iommu_hwpt_set_dirty_tracking) 660 * @flags: Combination of enum iommufd_hwpt_set_dirty_tracking_flags 661 * @hwpt_id: HW pagetable ID that represents the IOMMU domain 662 * @__reserved: Must be 0 663 * 664 * Toggle dirty tracking on an HW pagetable. 665 */ 666 struct iommu_hwpt_set_dirty_tracking { 667 __u32 size; 668 __u32 flags; 669 __u32 hwpt_id; 670 __u32 __reserved; 671 }; 672 #define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \ 673 IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING) 674 675 /** 676 * enum iommufd_hwpt_get_dirty_bitmap_flags - Flags for getting dirty bits 677 * @IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR: Just read the PTEs without clearing 678 * any dirty bits metadata. This flag 679 * can be passed in the expectation 680 * where the next operation is an unmap 681 * of the same IOVA range. 682 * 683 */ 684 enum iommufd_hwpt_get_dirty_bitmap_flags { 685 IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR = 1, 686 }; 687 688 /** 689 * struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP) 690 * @size: sizeof(struct iommu_hwpt_get_dirty_bitmap) 691 * @hwpt_id: HW pagetable ID that represents the IOMMU domain 692 * @flags: Combination of enum iommufd_hwpt_get_dirty_bitmap_flags 693 * @__reserved: Must be 0 694 * @iova: base IOVA of the bitmap first bit 695 * @length: IOVA range size 696 * @page_size: page size granularity of each bit in the bitmap 697 * @data: bitmap where to set the dirty bits. The bitmap bits each 698 * represent a page_size which you deviate from an arbitrary iova. 699 * 700 * Checking a given IOVA is dirty: 701 * 702 * data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64)) 703 * 704 * Walk the IOMMU pagetables for a given IOVA range to return a bitmap 705 * with the dirty IOVAs. In doing so it will also by default clear any 706 * dirty bit metadata set in the IOPTE. 707 */ 708 struct iommu_hwpt_get_dirty_bitmap { 709 __u32 size; 710 __u32 hwpt_id; 711 __u32 flags; 712 __u32 __reserved; 713 __aligned_u64 iova; 714 __aligned_u64 length; 715 __aligned_u64 page_size; 716 __aligned_u64 data; 717 }; 718 #define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \ 719 IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP) 720 721 /** 722 * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation 723 * Data Type 724 * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1 725 * @IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3: Invalidation data for ARM SMMUv3 726 */ 727 enum iommu_hwpt_invalidate_data_type { 728 IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0, 729 IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3 = 1, 730 }; 731 732 /** 733 * enum iommu_hwpt_vtd_s1_invalidate_flags - Flags for Intel VT-d 734 * stage-1 cache invalidation 735 * @IOMMU_VTD_INV_FLAGS_LEAF: Indicates whether the invalidation applies 736 * to all-levels page structure cache or just 737 * the leaf PTE cache. 738 */ 739 enum iommu_hwpt_vtd_s1_invalidate_flags { 740 IOMMU_VTD_INV_FLAGS_LEAF = 1 << 0, 741 }; 742 743 /** 744 * struct iommu_hwpt_vtd_s1_invalidate - Intel VT-d cache invalidation 745 * (IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) 746 * @addr: The start address of the range to be invalidated. It needs to 747 * be 4KB aligned. 748 * @npages: Number of contiguous 4K pages to be invalidated. 749 * @flags: Combination of enum iommu_hwpt_vtd_s1_invalidate_flags 750 * @__reserved: Must be 0 751 * 752 * The Intel VT-d specific invalidation data for user-managed stage-1 cache 753 * invalidation in nested translation. Userspace uses this structure to 754 * tell the impacted cache scope after modifying the stage-1 page table. 755 * 756 * Invalidating all the caches related to the page table by setting @addr 757 * to be 0 and @npages to be U64_MAX. 758 * 759 * The device TLB will be invalidated automatically if ATS is enabled. 760 */ 761 struct iommu_hwpt_vtd_s1_invalidate { 762 __aligned_u64 addr; 763 __aligned_u64 npages; 764 __u32 flags; 765 __u32 __reserved; 766 }; 767 768 /** 769 * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cahce invalidation 770 * (IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3) 771 * @cmd: 128-bit cache invalidation command that runs in SMMU CMDQ. 772 * Must be little-endian. 773 * 774 * Supported command list only when passing in a vIOMMU via @hwpt_id: 775 * CMDQ_OP_TLBI_NSNH_ALL 776 * CMDQ_OP_TLBI_NH_VA 777 * CMDQ_OP_TLBI_NH_VAA 778 * CMDQ_OP_TLBI_NH_ALL 779 * CMDQ_OP_TLBI_NH_ASID 780 * CMDQ_OP_ATC_INV 781 * CMDQ_OP_CFGI_CD 782 * CMDQ_OP_CFGI_CD_ALL 783 * 784 * -EIO will be returned if the command is not supported. 785 */ 786 struct iommu_viommu_arm_smmuv3_invalidate { 787 __aligned_le64 cmd[2]; 788 }; 789 790 /** 791 * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE) 792 * @size: sizeof(struct iommu_hwpt_invalidate) 793 * @hwpt_id: ID of a nested HWPT or a vIOMMU, for cache invalidation 794 * @data_uptr: User pointer to an array of driver-specific cache invalidation 795 * data. 796 * @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data 797 * type of all the entries in the invalidation request array. It 798 * should be a type supported by the hwpt pointed by @hwpt_id. 799 * @entry_len: Length (in bytes) of a request entry in the request array 800 * @entry_num: Input the number of cache invalidation requests in the array. 801 * Output the number of requests successfully handled by kernel. 802 * @__reserved: Must be 0. 803 * 804 * Invalidate iommu cache for user-managed page table or vIOMMU. Modifications 805 * on a user-managed page table should be followed by this operation, if a HWPT 806 * is passed in via @hwpt_id. Other caches, such as device cache or descriptor 807 * cache can be flushed if a vIOMMU is passed in via the @hwpt_id field. 808 * 809 * Each ioctl can support one or more cache invalidation requests in the array 810 * that has a total size of @entry_len * @entry_num. 811 * 812 * An empty invalidation request array by setting @entry_num==0 is allowed, and 813 * @entry_len and @data_uptr would be ignored in this case. This can be used to 814 * check if the given @data_type is supported or not by kernel. 815 */ 816 struct iommu_hwpt_invalidate { 817 __u32 size; 818 __u32 hwpt_id; 819 __aligned_u64 data_uptr; 820 __u32 data_type; 821 __u32 entry_len; 822 __u32 entry_num; 823 __u32 __reserved; 824 }; 825 #define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE) 826 827 /** 828 * enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault 829 * @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is 830 * valid. 831 * @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group. 832 */ 833 enum iommu_hwpt_pgfault_flags { 834 IOMMU_PGFAULT_FLAGS_PASID_VALID = (1 << 0), 835 IOMMU_PGFAULT_FLAGS_LAST_PAGE = (1 << 1), 836 }; 837 838 /** 839 * enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault 840 * @IOMMU_PGFAULT_PERM_READ: request for read permission 841 * @IOMMU_PGFAULT_PERM_WRITE: request for write permission 842 * @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the 843 * Execute Requested bit set in PASID TLP Prefix. 844 * @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the 845 * Privileged Mode Requested bit set in PASID TLP 846 * Prefix. 847 */ 848 enum iommu_hwpt_pgfault_perm { 849 IOMMU_PGFAULT_PERM_READ = (1 << 0), 850 IOMMU_PGFAULT_PERM_WRITE = (1 << 1), 851 IOMMU_PGFAULT_PERM_EXEC = (1 << 2), 852 IOMMU_PGFAULT_PERM_PRIV = (1 << 3), 853 }; 854 855 /** 856 * struct iommu_hwpt_pgfault - iommu page fault data 857 * @flags: Combination of enum iommu_hwpt_pgfault_flags 858 * @dev_id: id of the originated device 859 * @pasid: Process Address Space ID 860 * @grpid: Page Request Group Index 861 * @perm: Combination of enum iommu_hwpt_pgfault_perm 862 * @addr: Fault address 863 * @length: a hint of how much data the requestor is expecting to fetch. For 864 * example, if the PRI initiator knows it is going to do a 10MB 865 * transfer, it could fill in 10MB and the OS could pre-fault in 866 * 10MB of IOVA. It's default to 0 if there's no such hint. 867 * @cookie: kernel-managed cookie identifying a group of fault messages. The 868 * cookie number encoded in the last page fault of the group should 869 * be echoed back in the response message. 870 */ 871 struct iommu_hwpt_pgfault { 872 __u32 flags; 873 __u32 dev_id; 874 __u32 pasid; 875 __u32 grpid; 876 __u32 perm; 877 __u64 addr; 878 __u32 length; 879 __u32 cookie; 880 }; 881 882 /** 883 * enum iommufd_page_response_code - Return status of fault handlers 884 * @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables 885 * populated, retry the access. This is the 886 * "Success" defined in PCI 10.4.2.1. 887 * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the 888 * access. This is the "Invalid Request" in PCI 889 * 10.4.2.1. 890 */ 891 enum iommufd_page_response_code { 892 IOMMUFD_PAGE_RESP_SUCCESS = 0, 893 IOMMUFD_PAGE_RESP_INVALID = 1, 894 }; 895 896 /** 897 * struct iommu_hwpt_page_response - IOMMU page fault response 898 * @cookie: The kernel-managed cookie reported in the fault message. 899 * @code: One of response code in enum iommufd_page_response_code. 900 */ 901 struct iommu_hwpt_page_response { 902 __u32 cookie; 903 __u32 code; 904 }; 905 906 /** 907 * struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC) 908 * @size: sizeof(struct iommu_fault_alloc) 909 * @flags: Must be 0 910 * @out_fault_id: The ID of the new FAULT 911 * @out_fault_fd: The fd of the new FAULT 912 * 913 * Explicitly allocate a fault handling object. 914 */ 915 struct iommu_fault_alloc { 916 __u32 size; 917 __u32 flags; 918 __u32 out_fault_id; 919 __u32 out_fault_fd; 920 }; 921 #define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC) 922 923 /** 924 * enum iommu_viommu_type - Virtual IOMMU Type 925 * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use 926 * @IOMMU_VIOMMU_TYPE_ARM_SMMUV3: ARM SMMUv3 driver specific type 927 */ 928 enum iommu_viommu_type { 929 IOMMU_VIOMMU_TYPE_DEFAULT = 0, 930 IOMMU_VIOMMU_TYPE_ARM_SMMUV3 = 1, 931 }; 932 933 /** 934 * struct iommu_viommu_alloc - ioctl(IOMMU_VIOMMU_ALLOC) 935 * @size: sizeof(struct iommu_viommu_alloc) 936 * @flags: Must be 0 937 * @type: Type of the virtual IOMMU. Must be defined in enum iommu_viommu_type 938 * @dev_id: The device's physical IOMMU will be used to back the virtual IOMMU 939 * @hwpt_id: ID of a nesting parent HWPT to associate to 940 * @out_viommu_id: Output virtual IOMMU ID for the allocated object 941 * 942 * Allocate a virtual IOMMU object, representing the underlying physical IOMMU's 943 * virtualization support that is a security-isolated slice of the real IOMMU HW 944 * that is unique to a specific VM. Operations global to the IOMMU are connected 945 * to the vIOMMU, such as: 946 * - Security namespace for guest owned ID, e.g. guest-controlled cache tags 947 * - Non-device-affiliated event reporting, e.g. invalidation queue errors 948 * - Access to a sharable nesting parent pagetable across physical IOMMUs 949 * - Virtualization of various platforms IDs, e.g. RIDs and others 950 * - Delivery of paravirtualized invalidation 951 * - Direct assigned invalidation queues 952 * - Direct assigned interrupts 953 */ 954 struct iommu_viommu_alloc { 955 __u32 size; 956 __u32 flags; 957 __u32 type; 958 __u32 dev_id; 959 __u32 hwpt_id; 960 __u32 out_viommu_id; 961 }; 962 #define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC) 963 964 /** 965 * struct iommu_vdevice_alloc - ioctl(IOMMU_VDEVICE_ALLOC) 966 * @size: sizeof(struct iommu_vdevice_alloc) 967 * @viommu_id: vIOMMU ID to associate with the virtual device 968 * @dev_id: The physical device to allocate a virtual instance on the vIOMMU 969 * @out_vdevice_id: Object handle for the vDevice. Pass to IOMMU_DESTORY 970 * @virt_id: Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID 971 * of AMD IOMMU, and vRID of a nested Intel VT-d to a Context Table 972 * 973 * Allocate a virtual device instance (for a physical device) against a vIOMMU. 974 * This instance holds the device's information (related to its vIOMMU) in a VM. 975 */ 976 struct iommu_vdevice_alloc { 977 __u32 size; 978 __u32 viommu_id; 979 __u32 dev_id; 980 __u32 out_vdevice_id; 981 __aligned_u64 virt_id; 982 }; 983 #define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC) 984 985 /** 986 * struct iommu_ioas_change_process - ioctl(VFIO_IOAS_CHANGE_PROCESS) 987 * @size: sizeof(struct iommu_ioas_change_process) 988 * @__reserved: Must be 0 989 * 990 * This transfers pinned memory counts for every memory map in every IOAS 991 * in the context to the current process. This only supports maps created 992 * with IOMMU_IOAS_MAP_FILE, and returns EINVAL if other maps are present. 993 * If the ioctl returns a failure status, then nothing is changed. 994 * 995 * This API is useful for transferring operation of a device from one process 996 * to another, such as during userland live update. 997 */ 998 struct iommu_ioas_change_process { 999 __u32 size; 1000 __u32 __reserved; 1001 }; 1002 1003 #define IOMMU_IOAS_CHANGE_PROCESS \ 1004 _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS) 1005 1006 #endif 1007