1 /* SPDX-License-Identifier: MIT */ 2 /* 3 * Copyright (C) The Asahi Linux Contributors 4 * Copyright (C) 2018-2023 Collabora Ltd. 5 * Copyright (C) 2014-2018 Broadcom 6 */ 7 #ifndef _ASAHI_DRM_H_ 8 #define _ASAHI_DRM_H_ 9 10 #include "drm.h" 11 12 #if defined(__cplusplus) 13 extern "C" { 14 #endif 15 16 /** 17 * DOC: Introduction to the Asahi UAPI 18 * 19 * This documentation describes the Asahi IOCTLs. 20 * 21 * Just a few generic rules about the data passed to the Asahi IOCTLs (cribbed 22 * from Panthor): 23 * 24 * - Structures must be aligned on 64-bit/8-byte. If the object is not 25 * naturally aligned, a padding field must be added. 26 * - Fields must be explicitly aligned to their natural type alignment with 27 * pad[0..N] fields. 28 * - All padding fields will be checked by the driver to make sure they are 29 * zeroed. 30 * - Flags can be added, but not removed/replaced. 31 * - New fields can be added to the main structures (the structures 32 * directly passed to the ioctl). Those fields can be added at the end of 33 * the structure, or replace existing padding fields. Any new field being 34 * added must preserve the behavior that existed before those fields were 35 * added when a value of zero is passed. 36 * - New fields can be added to indirect objects (objects pointed by the 37 * main structure), iff those objects are passed a size to reflect the 38 * size known by the userspace driver (see 39 * drm_asahi_cmd_header::size). 40 * - If the kernel driver is too old to know some fields, those will be 41 * ignored if zero, and otherwise rejected (and so will be zero on output). 42 * - If userspace is too old to know some fields, those will be zeroed 43 * (input) before the structure is parsed by the kernel driver. 44 * - Each new flag/field addition must come with a driver version update so 45 * the userspace driver doesn't have to guess which flags are supported. 46 * - Structures should not contain unions, as this would defeat the 47 * extensibility of such structures. 48 * - IOCTLs can't be removed or replaced. New IOCTL IDs should be placed 49 * at the end of the drm_asahi_ioctl_id enum. 50 */ 51 52 /** 53 * enum drm_asahi_ioctl_id - IOCTL IDs 54 * 55 * Place new ioctls at the end, don't re-order, don't replace or remove entries. 56 * 57 * These IDs are not meant to be used directly. Use the DRM_IOCTL_ASAHI_xxx 58 * definitions instead. 59 */ 60 enum drm_asahi_ioctl_id { 61 /** @DRM_ASAHI_GET_PARAMS: Query device properties. */ 62 DRM_ASAHI_GET_PARAMS = 0, 63 64 /** @DRM_ASAHI_GET_TIME: Query device time. */ 65 DRM_ASAHI_GET_TIME, 66 67 /** @DRM_ASAHI_VM_CREATE: Create a GPU VM address space. */ 68 DRM_ASAHI_VM_CREATE, 69 70 /** @DRM_ASAHI_VM_DESTROY: Destroy a VM. */ 71 DRM_ASAHI_VM_DESTROY, 72 73 /** @DRM_ASAHI_VM_BIND: Bind/unbind memory to a VM. */ 74 DRM_ASAHI_VM_BIND, 75 76 /** @DRM_ASAHI_GEM_CREATE: Create a buffer object. */ 77 DRM_ASAHI_GEM_CREATE, 78 79 /** 80 * @DRM_ASAHI_GEM_MMAP_OFFSET: Get offset to pass to mmap() to map a 81 * given GEM handle. 82 */ 83 DRM_ASAHI_GEM_MMAP_OFFSET, 84 85 /** @DRM_ASAHI_GEM_BIND_OBJECT: Bind memory as a special object */ 86 DRM_ASAHI_GEM_BIND_OBJECT, 87 88 /** @DRM_ASAHI_QUEUE_CREATE: Create a scheduling queue. */ 89 DRM_ASAHI_QUEUE_CREATE, 90 91 /** @DRM_ASAHI_QUEUE_DESTROY: Destroy a scheduling queue. */ 92 DRM_ASAHI_QUEUE_DESTROY, 93 94 /** @DRM_ASAHI_SUBMIT: Submit commands to a queue. */ 95 DRM_ASAHI_SUBMIT, 96 }; 97 98 #define DRM_ASAHI_MAX_CLUSTERS 64 99 100 /** 101 * struct drm_asahi_params_global - Global parameters. 102 * 103 * This struct may be queried by drm_asahi_get_params. 104 */ 105 struct drm_asahi_params_global { 106 /** @features: Feature bits from drm_asahi_feature */ 107 __u64 features; 108 109 /** @gpu_generation: GPU generation, e.g. 13 for G13G */ 110 __u32 gpu_generation; 111 112 /** @gpu_variant: GPU variant as a character, e.g. 'C' for G13C */ 113 __u32 gpu_variant; 114 115 /** 116 * @gpu_revision: GPU revision in BCD, e.g. 0x00 for 'A0' or 117 * 0x21 for 'C1' 118 */ 119 __u32 gpu_revision; 120 121 /** @chip_id: Chip ID in BCD, e.g. 0x8103 for T8103 */ 122 __u32 chip_id; 123 124 /** @num_dies: Number of dies in the SoC */ 125 __u32 num_dies; 126 127 /** @num_clusters_total: Number of GPU clusters (across all dies) */ 128 __u32 num_clusters_total; 129 130 /** 131 * @num_cores_per_cluster: Number of logical cores per cluster 132 * (including inactive/nonexistent) 133 */ 134 __u32 num_cores_per_cluster; 135 136 /** @max_frequency_khz: Maximum GPU core clock frequency */ 137 __u32 max_frequency_khz; 138 139 /** @core_masks: Bitmask of present/enabled cores per cluster */ 140 __u64 core_masks[DRM_ASAHI_MAX_CLUSTERS]; 141 142 /** 143 * @vm_start: VM range start VMA. Together with @vm_end, this defines 144 * the window of valid GPU VAs. Userspace is expected to subdivide VAs 145 * out of this window. 146 * 147 * This window contains all virtual addresses that userspace needs to 148 * know about. There may be kernel-internal GPU VAs outside this range, 149 * but that detail is not relevant here. 150 */ 151 __u64 vm_start; 152 153 /** @vm_end: VM range end VMA */ 154 __u64 vm_end; 155 156 /** 157 * @vm_kernel_min_size: Minimum kernel VMA window size. 158 * 159 * When creating a VM, userspace is required to carve out a section of 160 * virtual addresses (within the range given by @vm_start and 161 * @vm_end). The kernel will allocate various internal structures 162 * within the specified VA range. 163 * 164 * Allowing userspace to choose the VA range for the kernel, rather than 165 * the kernel reserving VAs and requiring userspace to cope, can assist 166 * in implementing SVM. 167 */ 168 __u64 vm_kernel_min_size; 169 170 /** 171 * @max_commands_per_submission: Maximum number of supported commands 172 * per submission. This mirrors firmware limits. Userspace must split up 173 * larger command buffers, which may require inserting additional 174 * synchronization. 175 */ 176 __u32 max_commands_per_submission; 177 178 /** 179 * @max_attachments: Maximum number of drm_asahi_attachment's per 180 * command 181 */ 182 __u32 max_attachments; 183 184 /** 185 * @command_timestamp_frequency_hz: Timebase frequency for timestamps 186 * written during command execution, specified via drm_asahi_timestamp 187 * structures. As this rate is controlled by the firmware, it is a 188 * queryable parameter. 189 * 190 * Userspace must divide by this frequency to convert timestamps to 191 * seconds, rather than hardcoding a particular firmware's rate. 192 */ 193 __u64 command_timestamp_frequency_hz; 194 }; 195 196 /** 197 * enum drm_asahi_feature - Feature bits 198 * 199 * This covers only features that userspace cannot infer from the architecture 200 * version. Most features don't need to be here. 201 */ 202 enum drm_asahi_feature { 203 /** 204 * @DRM_ASAHI_FEATURE_SOFT_FAULTS: GPU has "soft fault" enabled. Shader 205 * loads of unmapped memory will return zero. Shader stores to unmapped 206 * memory will be silently discarded. Note that only shader load/store 207 * is affected. Other hardware units are not affected, notably including 208 * texture sampling. 209 * 210 * Soft fault is set when initializing the GPU and cannot be runtime 211 * toggled. Therefore, it is exposed as a feature bit and not a 212 * userspace-settable flag on the VM. When soft fault is enabled, 213 * userspace can speculate memory accesses more aggressively. 214 */ 215 DRM_ASAHI_FEATURE_SOFT_FAULTS = (1UL) << 0, 216 }; 217 218 /** 219 * struct drm_asahi_get_params - Arguments passed to DRM_IOCTL_ASAHI_GET_PARAMS 220 */ 221 struct drm_asahi_get_params { 222 /** @param_group: Parameter group to fetch (MBZ) */ 223 __u32 param_group; 224 225 /** @pad: MBZ */ 226 __u32 pad; 227 228 /** @pointer: User pointer to write parameter struct */ 229 __u64 pointer; 230 231 /** 232 * @size: Size of the user buffer. In case of older userspace, this may 233 * be less than sizeof(struct drm_asahi_params_global). The kernel will 234 * not write past the length specified here, allowing extensibility. 235 */ 236 __u64 size; 237 }; 238 239 /** 240 * struct drm_asahi_vm_create - Arguments passed to DRM_IOCTL_ASAHI_VM_CREATE 241 */ 242 struct drm_asahi_vm_create { 243 /** 244 * @kernel_start: Start of the kernel-reserved address range. See 245 * drm_asahi_params_global::vm_kernel_min_size. 246 * 247 * Both @kernel_start and @kernel_end must be within the range of 248 * valid VAs given by drm_asahi_params_global::vm_start and 249 * drm_asahi_params_global::vm_end. The size of the kernel range 250 * (@kernel_end - @kernel_start) must be at least 251 * drm_asahi_params_global::vm_kernel_min_size. 252 * 253 * Userspace must not bind any memory on this VM into this reserved 254 * range, it is for kernel use only. 255 */ 256 __u64 kernel_start; 257 258 /** 259 * @kernel_end: End of the kernel-reserved address range. See 260 * @kernel_start. 261 */ 262 __u64 kernel_end; 263 264 /** @vm_id: Returned VM ID */ 265 __u32 vm_id; 266 267 /** @pad: MBZ */ 268 __u32 pad; 269 }; 270 271 /** 272 * struct drm_asahi_vm_destroy - Arguments passed to DRM_IOCTL_ASAHI_VM_DESTROY 273 */ 274 struct drm_asahi_vm_destroy { 275 /** @vm_id: VM ID to be destroyed */ 276 __u32 vm_id; 277 278 /** @pad: MBZ */ 279 __u32 pad; 280 }; 281 282 /** 283 * enum drm_asahi_gem_flags - Flags for GEM creation 284 */ 285 enum drm_asahi_gem_flags { 286 /** 287 * @DRM_ASAHI_GEM_WRITEBACK: BO should be CPU-mapped as writeback. 288 * 289 * Map as writeback instead of write-combine. This optimizes for CPU 290 * reads. 291 */ 292 DRM_ASAHI_GEM_WRITEBACK = (1L << 0), 293 294 /** 295 * @DRM_ASAHI_GEM_VM_PRIVATE: BO is private to this GPU VM (no exports). 296 */ 297 DRM_ASAHI_GEM_VM_PRIVATE = (1L << 1), 298 }; 299 300 /** 301 * struct drm_asahi_gem_create - Arguments passed to DRM_IOCTL_ASAHI_GEM_CREATE 302 */ 303 struct drm_asahi_gem_create { 304 /** @size: Size of the BO */ 305 __u64 size; 306 307 /** @flags: Combination of drm_asahi_gem_flags flags. */ 308 __u32 flags; 309 310 /** 311 * @vm_id: VM ID to assign to the BO, if DRM_ASAHI_GEM_VM_PRIVATE is set 312 */ 313 __u32 vm_id; 314 315 /** @handle: Returned GEM handle for the BO */ 316 __u32 handle; 317 318 /** @pad: MBZ */ 319 __u32 pad; 320 }; 321 322 /** 323 * struct drm_asahi_gem_mmap_offset - Arguments passed to 324 * DRM_IOCTL_ASAHI_GEM_MMAP_OFFSET 325 */ 326 struct drm_asahi_gem_mmap_offset { 327 /** @handle: Handle for the object being mapped. */ 328 __u32 handle; 329 330 /** @flags: Must be zero */ 331 __u32 flags; 332 333 /** @offset: The fake offset to use for subsequent mmap call */ 334 __u64 offset; 335 }; 336 337 /** 338 * enum drm_asahi_bind_flags - Flags for GEM binding 339 */ 340 enum drm_asahi_bind_flags { 341 /** 342 * @DRM_ASAHI_BIND_UNBIND: Instead of binding a GEM object to the range, 343 * simply unbind the GPU VMA range. 344 */ 345 DRM_ASAHI_BIND_UNBIND = (1L << 0), 346 347 /** @DRM_ASAHI_BIND_READ: Map BO with GPU read permission */ 348 DRM_ASAHI_BIND_READ = (1L << 1), 349 350 /** @DRM_ASAHI_BIND_WRITE: Map BO with GPU write permission */ 351 DRM_ASAHI_BIND_WRITE = (1L << 2), 352 353 /** 354 * @DRM_ASAHI_BIND_SINGLE_PAGE: Map a single page of the BO repeatedly 355 * across the VA range. 356 * 357 * This is useful to fill a VA range with scratch pages or zero pages. 358 * It is intended as a mechanism to accelerate sparse. 359 */ 360 DRM_ASAHI_BIND_SINGLE_PAGE = (1L << 3), 361 }; 362 363 /** 364 * struct drm_asahi_gem_bind_op - Description of a single GEM bind operation. 365 */ 366 struct drm_asahi_gem_bind_op { 367 /** @flags: Combination of drm_asahi_bind_flags flags. */ 368 __u32 flags; 369 370 /** @handle: GEM object to bind (except for UNBIND) */ 371 __u32 handle; 372 373 /** 374 * @offset: Offset into the object (except for UNBIND). 375 * 376 * For a regular bind, this is the beginning of the region of the GEM 377 * object to bind. 378 * 379 * For a single-page bind, this is the offset to the single page that 380 * will be repeatedly bound. 381 * 382 * Must be page-size aligned. 383 */ 384 __u64 offset; 385 386 /** 387 * @range: Number of bytes to bind/unbind to @addr. 388 * 389 * Must be page-size aligned. 390 */ 391 __u64 range; 392 393 /** 394 * @addr: Address to bind to. 395 * 396 * Must be page-size aligned. 397 */ 398 __u64 addr; 399 }; 400 401 /** 402 * struct drm_asahi_vm_bind - Arguments passed to 403 * DRM_IOCTL_ASAHI_VM_BIND 404 */ 405 struct drm_asahi_vm_bind { 406 /** @vm_id: The ID of the VM to bind to */ 407 __u32 vm_id; 408 409 /** @num_binds: number of binds in this IOCTL. */ 410 __u32 num_binds; 411 412 /** 413 * @stride: Stride in bytes between consecutive binds. This allows 414 * extensibility of drm_asahi_gem_bind_op. 415 */ 416 __u32 stride; 417 418 /** @pad: MBZ */ 419 __u32 pad; 420 421 /** 422 * @userptr: User pointer to an array of @num_binds structures of type 423 * @drm_asahi_gem_bind_op and size @stride bytes. 424 */ 425 __u64 userptr; 426 }; 427 428 /** 429 * enum drm_asahi_bind_object_op - Special object bind operation 430 */ 431 enum drm_asahi_bind_object_op { 432 /** @DRM_ASAHI_BIND_OBJECT_OP_BIND: Bind a BO as a special GPU object */ 433 DRM_ASAHI_BIND_OBJECT_OP_BIND = 0, 434 435 /** @DRM_ASAHI_BIND_OBJECT_OP_UNBIND: Unbind a special GPU object */ 436 DRM_ASAHI_BIND_OBJECT_OP_UNBIND = 1, 437 }; 438 439 /** 440 * enum drm_asahi_bind_object_flags - Special object bind flags 441 */ 442 enum drm_asahi_bind_object_flags { 443 /** 444 * @DRM_ASAHI_BIND_OBJECT_USAGE_TIMESTAMPS: Map a BO as a timestamp 445 * buffer. 446 */ 447 DRM_ASAHI_BIND_OBJECT_USAGE_TIMESTAMPS = (1L << 0), 448 }; 449 450 /** 451 * struct drm_asahi_gem_bind_object - Arguments passed to 452 * DRM_IOCTL_ASAHI_GEM_BIND_OBJECT 453 */ 454 struct drm_asahi_gem_bind_object { 455 /** @op: Bind operation (enum drm_asahi_bind_object_op) */ 456 __u32 op; 457 458 /** @flags: Combination of drm_asahi_bind_object_flags flags. */ 459 __u32 flags; 460 461 /** @handle: GEM object to bind/unbind (BIND) */ 462 __u32 handle; 463 464 /** @vm_id: The ID of the VM to operate on (MBZ currently) */ 465 __u32 vm_id; 466 467 /** @offset: Offset into the object (BIND only) */ 468 __u64 offset; 469 470 /** @range: Number of bytes to bind/unbind (BIND only) */ 471 __u64 range; 472 473 /** @object_handle: Object handle (out for BIND, in for UNBIND) */ 474 __u32 object_handle; 475 476 /** @pad: MBZ */ 477 __u32 pad; 478 }; 479 480 /** 481 * enum drm_asahi_cmd_type - Command type 482 */ 483 enum drm_asahi_cmd_type { 484 /** 485 * @DRM_ASAHI_CMD_RENDER: Render command, executing on the render 486 * subqueue. Combined vertex and fragment operation. 487 * 488 * Followed by a @drm_asahi_cmd_render payload. 489 */ 490 DRM_ASAHI_CMD_RENDER = 0, 491 492 /** 493 * @DRM_ASAHI_CMD_COMPUTE: Compute command on the compute subqueue. 494 * 495 * Followed by a @drm_asahi_cmd_compute payload. 496 */ 497 DRM_ASAHI_CMD_COMPUTE = 1, 498 499 /** 500 * @DRM_ASAHI_SET_VERTEX_ATTACHMENTS: Software command to set 501 * attachments for subsequent vertex shaders in the same submit. 502 * 503 * Followed by (possibly multiple) @drm_asahi_attachment payloads. 504 */ 505 DRM_ASAHI_SET_VERTEX_ATTACHMENTS = 2, 506 507 /** 508 * @DRM_ASAHI_SET_FRAGMENT_ATTACHMENTS: Software command to set 509 * attachments for subsequent fragment shaders in the same submit. 510 * 511 * Followed by (possibly multiple) @drm_asahi_attachment payloads. 512 */ 513 DRM_ASAHI_SET_FRAGMENT_ATTACHMENTS = 3, 514 515 /** 516 * @DRM_ASAHI_SET_COMPUTE_ATTACHMENTS: Software command to set 517 * attachments for subsequent compute shaders in the same submit. 518 * 519 * Followed by (possibly multiple) @drm_asahi_attachment payloads. 520 */ 521 DRM_ASAHI_SET_COMPUTE_ATTACHMENTS = 4, 522 }; 523 524 /** 525 * enum drm_asahi_priority - Scheduling queue priority. 526 * 527 * These priorities are forwarded to the firmware to influence firmware 528 * scheduling. The exact policy is ultimately decided by firmware, but 529 * these enums allow userspace to communicate the intentions. 530 */ 531 enum drm_asahi_priority { 532 /** @DRM_ASAHI_PRIORITY_LOW: Low priority queue. */ 533 DRM_ASAHI_PRIORITY_LOW = 0, 534 535 /** @DRM_ASAHI_PRIORITY_MEDIUM: Medium priority queue. */ 536 DRM_ASAHI_PRIORITY_MEDIUM = 1, 537 538 /** 539 * @DRM_ASAHI_PRIORITY_HIGH: High priority queue. 540 * 541 * Reserved for future extension. 542 */ 543 DRM_ASAHI_PRIORITY_HIGH = 2, 544 545 /** 546 * @DRM_ASAHI_PRIORITY_REALTIME: Real-time priority queue. 547 * 548 * Reserved for future extension. 549 */ 550 DRM_ASAHI_PRIORITY_REALTIME = 3, 551 }; 552 553 /** 554 * struct drm_asahi_queue_create - Arguments passed to 555 * DRM_IOCTL_ASAHI_QUEUE_CREATE 556 */ 557 struct drm_asahi_queue_create { 558 /** @flags: MBZ */ 559 __u32 flags; 560 561 /** @vm_id: The ID of the VM this queue is bound to */ 562 __u32 vm_id; 563 564 /** @priority: One of drm_asahi_priority */ 565 __u32 priority; 566 567 /** @queue_id: The returned queue ID */ 568 __u32 queue_id; 569 570 /** 571 * @usc_exec_base: GPU base address for all USC binaries (shaders) on 572 * this queue. USC addresses are 32-bit relative to this 64-bit base. 573 * 574 * This sets the following registers on all queue commands: 575 * 576 * USC_EXEC_BASE_TA (vertex) 577 * USC_EXEC_BASE_ISP (fragment) 578 * USC_EXEC_BASE_CP (compute) 579 * 580 * While the hardware lets us configure these independently per command, 581 * we do not have a use case for this. Instead, we expect userspace to 582 * fix a 4GiB VA carveout for USC memory and pass its base address here. 583 */ 584 __u64 usc_exec_base; 585 }; 586 587 /** 588 * struct drm_asahi_queue_destroy - Arguments passed to 589 * DRM_IOCTL_ASAHI_QUEUE_DESTROY 590 */ 591 struct drm_asahi_queue_destroy { 592 /** @queue_id: The queue ID to be destroyed */ 593 __u32 queue_id; 594 595 /** @pad: MBZ */ 596 __u32 pad; 597 }; 598 599 /** 600 * enum drm_asahi_sync_type - Sync item type 601 */ 602 enum drm_asahi_sync_type { 603 /** @DRM_ASAHI_SYNC_SYNCOBJ: Binary sync object */ 604 DRM_ASAHI_SYNC_SYNCOBJ = 0, 605 606 /** @DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ: Timeline sync object */ 607 DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ = 1, 608 }; 609 610 /** 611 * struct drm_asahi_sync - Sync item 612 */ 613 struct drm_asahi_sync { 614 /** @sync_type: One of drm_asahi_sync_type */ 615 __u32 sync_type; 616 617 /** @handle: The sync object handle */ 618 __u32 handle; 619 620 /** @timeline_value: Timeline value for timeline sync objects */ 621 __u64 timeline_value; 622 }; 623 624 /** 625 * define DRM_ASAHI_BARRIER_NONE - Command index for no barrier 626 * 627 * This special value may be passed in to drm_asahi_command::vdm_barrier or 628 * drm_asahi_command::cdm_barrier to indicate that the respective subqueue 629 * should not wait on any previous work. 630 */ 631 #define DRM_ASAHI_BARRIER_NONE (0xFFFFu) 632 633 /** 634 * struct drm_asahi_cmd_header - Top level command structure 635 * 636 * This struct is core to the command buffer definition and therefore is not 637 * extensible. 638 */ 639 struct drm_asahi_cmd_header { 640 /** @cmd_type: One of drm_asahi_cmd_type */ 641 __u16 cmd_type; 642 643 /** 644 * @size: Size of this command, not including this header. 645 * 646 * For hardware commands, this enables extensibility of commands without 647 * requiring extra command types. Passing a command that is shorter 648 * than expected is explicitly allowed for backwards-compatibility. 649 * Truncated fields will be zeroed. 650 * 651 * For the synthetic attachment setting commands, this implicitly 652 * encodes the number of attachments. These commands take multiple 653 * fixed-size @drm_asahi_attachment structures as their payload, so size 654 * equals number of attachments * sizeof(struct drm_asahi_attachment). 655 */ 656 __u16 size; 657 658 /** 659 * @vdm_barrier: VDM (render) command index to wait on. 660 * 661 * Barriers are indices relative to the beginning of a given submit. A 662 * barrier of 0 waits on commands submitted to the respective subqueue 663 * in previous submit ioctls. A barrier of N waits on N previous 664 * commands on the subqueue within the current submit ioctl. As a 665 * special case, passing @DRM_ASAHI_BARRIER_NONE avoids waiting on any 666 * commands in the subqueue. 667 * 668 * Examples: 669 * 670 * 0: This waits on all previous work. 671 * 672 * NONE: This does not wait for anything on this subqueue. 673 * 674 * 1: This waits on the first render command in the submit. 675 * This is valid only if there are multiple render commands in the 676 * same submit. 677 * 678 * Barriers are valid only for hardware commands. Synthetic software 679 * commands to set attachments must pass NONE here. 680 */ 681 __u16 vdm_barrier; 682 683 /** 684 * @cdm_barrier: CDM (compute) command index to wait on. 685 * 686 * See @vdm_barrier, and replace VDM/render with CDM/compute. 687 */ 688 __u16 cdm_barrier; 689 }; 690 691 /** 692 * struct drm_asahi_submit - Arguments passed to DRM_IOCTL_ASAHI_SUBMIT 693 */ 694 struct drm_asahi_submit { 695 /** 696 * @syncs: An optional pointer to an array of drm_asahi_sync. The first 697 * @in_sync_count elements are in-syncs, then the remaining 698 * @out_sync_count elements are out-syncs. Using a single array with 699 * explicit partitioning simplifies handling. 700 */ 701 __u64 syncs; 702 703 /** 704 * @cmdbuf: Pointer to the command buffer to submit. 705 * 706 * This is a flat command buffer. By design, it contains no CPU 707 * pointers, which makes it suitable for a virtgpu wire protocol without 708 * requiring any serializing/deserializing step. 709 * 710 * It consists of a series of commands. Each command begins with a 711 * fixed-size @drm_asahi_cmd_header header and is followed by a 712 * variable-length payload according to the type and size in the header. 713 * 714 * The combined count of "real" hardware commands must be nonzero and at 715 * most drm_asahi_params_global::max_commands_per_submission. 716 */ 717 __u64 cmdbuf; 718 719 /** @flags: Flags for command submission (MBZ) */ 720 __u32 flags; 721 722 /** @queue_id: The queue ID to be submitted to */ 723 __u32 queue_id; 724 725 /** 726 * @in_sync_count: Number of sync objects to wait on before starting 727 * this job. 728 */ 729 __u32 in_sync_count; 730 731 /** 732 * @out_sync_count: Number of sync objects to signal upon completion of 733 * this job. 734 */ 735 __u32 out_sync_count; 736 737 /** @cmdbuf_size: Command buffer size in bytes */ 738 __u32 cmdbuf_size; 739 740 /** @pad: MBZ */ 741 __u32 pad; 742 }; 743 744 /** 745 * struct drm_asahi_attachment - Describe an "attachment". 746 * 747 * Attachments are any memory written by shaders, notably including render 748 * target attachments written by the end-of-tile program. This is purely a hint 749 * about the accessed memory regions. It is optional to specify, which is 750 * fortunate as it cannot be specified precisely with bindless access anyway. 751 * But where possible, it's probably a good idea for userspace to include these 752 * hints, forwarded to the firmware. 753 * 754 * This struct is implicitly sized and therefore is not extensible. 755 */ 756 struct drm_asahi_attachment { 757 /** @pointer: Base address of the attachment */ 758 __u64 pointer; 759 760 /** @size: Size of the attachment in bytes */ 761 __u64 size; 762 763 /** @pad: MBZ */ 764 __u32 pad; 765 766 /** @flags: MBZ */ 767 __u32 flags; 768 }; 769 770 enum drm_asahi_render_flags { 771 /** 772 * @DRM_ASAHI_RENDER_VERTEX_SCRATCH: A vertex stage shader uses scratch 773 * memory. 774 */ 775 DRM_ASAHI_RENDER_VERTEX_SCRATCH = (1U << 0), 776 777 /** 778 * @DRM_ASAHI_RENDER_PROCESS_EMPTY_TILES: Process even empty tiles. 779 * This must be set when clearing render targets. 780 */ 781 DRM_ASAHI_RENDER_PROCESS_EMPTY_TILES = (1U << 1), 782 783 /** 784 * @DRM_ASAHI_RENDER_NO_VERTEX_CLUSTERING: Run vertex stage on a single 785 * cluster (on multi-cluster GPUs) 786 * 787 * This harms performance but can workaround certain sync/coherency 788 * bugs, and therefore is useful for debugging. 789 */ 790 DRM_ASAHI_RENDER_NO_VERTEX_CLUSTERING = (1U << 2), 791 792 /** 793 * @DRM_ASAHI_RENDER_DBIAS_IS_INT: Use integer depth bias formula. 794 * 795 * Graphics specifications contain two alternate formulas for depth 796 * bias, a float formula used with floating-point depth buffers and an 797 * integer formula using with unorm depth buffers. This flag specifies 798 * that the integer formula should be used. If omitted, the float 799 * formula is used instead. 800 * 801 * This corresponds to bit 18 of the relevant hardware control register, 802 * so we match that here for efficiency. 803 */ 804 DRM_ASAHI_RENDER_DBIAS_IS_INT = (1U << 18), 805 }; 806 807 /** 808 * struct drm_asahi_zls_buffer - Describe a depth or stencil buffer. 809 * 810 * These fields correspond to hardware registers in the ZLS (Z Load/Store) unit. 811 * There are three hardware registers for each field respectively for loads, 812 * stores, and partial renders. In practice, it makes sense to set all to the 813 * same values, except in exceptional cases not yet implemented in userspace, so 814 * we do not duplicate here for simplicity/efficiency. 815 * 816 * This struct is embedded in other structs and therefore is not extensible. 817 */ 818 struct drm_asahi_zls_buffer { 819 /** @base: Base address of the buffer */ 820 __u64 base; 821 822 /** 823 * @comp_base: If the load buffer is compressed, address of the 824 * compression metadata section. 825 */ 826 __u64 comp_base; 827 828 /** 829 * @stride: If layered rendering is enabled, the number of bytes 830 * between each layer of the buffer. 831 */ 832 __u32 stride; 833 834 /** 835 * @comp_stride: If layered rendering is enabled, the number of bytes 836 * between each layer of the compression metadata. 837 */ 838 __u32 comp_stride; 839 }; 840 841 /** 842 * struct drm_asahi_timestamp - Describe a timestamp write. 843 * 844 * The firmware can optionally write the GPU timestamp at render pass 845 * granularities, but it needs to be mapped specially via 846 * DRM_IOCTL_ASAHI_GEM_BIND_OBJECT. This structure therefore describes where to 847 * write as a handle-offset pair, rather than a GPU address like normal. 848 * 849 * This struct is embedded in other structs and therefore is not extensible. 850 */ 851 struct drm_asahi_timestamp { 852 /** 853 * @handle: Handle of the timestamp buffer, or 0 to skip this 854 * timestamp. If nonzero, this must equal the value returned in 855 * drm_asahi_gem_bind_object::object_handle. 856 */ 857 __u32 handle; 858 859 /** @offset: Offset to write into the timestamp buffer */ 860 __u32 offset; 861 }; 862 863 /** 864 * struct drm_asahi_timestamps - Describe timestamp writes. 865 * 866 * Each operation that can be timestamped, can be timestamped at the start and 867 * end. Therefore, drm_asahi_timestamp structs always come in pairs, bundled 868 * together into drm_asahi_timestamps. 869 * 870 * This struct is embedded in other structs and therefore is not extensible. 871 */ 872 struct drm_asahi_timestamps { 873 /** @start: Timestamp recorded at the start of the operation */ 874 struct drm_asahi_timestamp start; 875 876 /** @end: Timestamp recorded at the end of the operation */ 877 struct drm_asahi_timestamp end; 878 }; 879 880 /** 881 * struct drm_asahi_helper_program - Describe helper program configuration. 882 * 883 * The helper program is a compute-like kernel required for various hardware 884 * functionality. Its most important role is dynamically allocating 885 * scratch/stack memory for individual subgroups, by partitioning a static 886 * allocation shared for the whole device. It is supplied by userspace via 887 * drm_asahi_helper_program and internally dispatched by the hardware as needed. 888 * 889 * This struct is embedded in other structs and therefore is not extensible. 890 */ 891 struct drm_asahi_helper_program { 892 /** 893 * @binary: USC address to the helper program binary. This is a tagged 894 * pointer with configuration in the bottom bits. 895 */ 896 __u32 binary; 897 898 /** @cfg: Additional configuration bits for the helper program. */ 899 __u32 cfg; 900 901 /** 902 * @data: Data passed to the helper program. This value is not 903 * interpreted by the kernel, firmware, or hardware in any way. It is 904 * simply a sideband for userspace, set with the submit ioctl and read 905 * via special registers inside the helper program. 906 * 907 * In practice, userspace will pass a 64-bit GPU VA here pointing to the 908 * actual arguments, which presumably don't fit in 64-bits. 909 */ 910 __u64 data; 911 }; 912 913 /** 914 * struct drm_asahi_bg_eot - Describe a background or end-of-tile program. 915 * 916 * The background and end-of-tile programs are dispatched by the hardware at the 917 * beginning and end of rendering. As the hardware "tilebuffer" is simply local 918 * memory, these programs are necessary to implement API-level render targets. 919 * The fragment-like background program is responsible for loading either the 920 * clear colour or the existing render target contents, while the compute-like 921 * end-of-tile program stores the tilebuffer contents to memory. 922 * 923 * This struct is embedded in other structs and therefore is not extensible. 924 */ 925 struct drm_asahi_bg_eot { 926 /** 927 * @usc: USC address of the hardware USC words binding resources 928 * (including images and uniforms) and the program itself. Note this is 929 * an additional layer of indirection compared to the helper program, 930 * avoiding the need for a sideband for data. This is a tagged pointer 931 * with additional configuration in the bottom bits. 932 */ 933 __u32 usc; 934 935 /** 936 * @rsrc_spec: Resource specifier for the program. This is a packed 937 * hardware data structure describing the required number of registers, 938 * uniforms, bound textures, and bound samplers. 939 */ 940 __u32 rsrc_spec; 941 }; 942 943 /** 944 * struct drm_asahi_cmd_render - Command to submit 3D 945 * 946 * This command submits a single render pass. The hardware control stream may 947 * include many draws and subpasses, but within the command, the framebuffer 948 * dimensions and attachments are fixed. 949 * 950 * The hardware requires the firmware to set a large number of Control Registers 951 * setting up state at render pass granularity before each command rendering 3D. 952 * The firmware bundles this state into data structures. Unfortunately, we 953 * cannot expose either any of that directly to userspace, because the 954 * kernel-firmware ABI is not stable. Although we can guarantee the firmware 955 * updates in tandem with the kernel, we cannot break old userspace when 956 * upgrading the firmware and kernel. Therefore, we need to abstract well the 957 * data structures to avoid tying our hands with future firmwares. 958 * 959 * The bulk of drm_asahi_cmd_render therefore consists of values of hardware 960 * control registers, marshalled via the firmware interface. 961 * 962 * The framebuffer/tilebuffer dimensions are also specified here. In addition to 963 * being passed to the firmware/hardware, the kernel requires these dimensions 964 * to calculate various essential tiling-related data structures. It is 965 * unfortunate that our submits are heavier than on vendors with saner 966 * hardware-software interfaces. The upshot is all of this information is 967 * readily available to userspace with all current APIs. 968 * 969 * It looks odd - but it's not overly burdensome and it ensures we can remain 970 * compatible with old userspace. 971 */ 972 struct drm_asahi_cmd_render { 973 /** @flags: Combination of drm_asahi_render_flags flags. */ 974 __u32 flags; 975 976 /** 977 * @isp_zls_pixels: ISP_ZLS_PIXELS register value. This contains the 978 * depth/stencil width/height, which may differ from the framebuffer 979 * width/height. 980 */ 981 __u32 isp_zls_pixels; 982 983 /** 984 * @vdm_ctrl_stream_base: VDM_CTRL_STREAM_BASE register value. GPU 985 * address to the beginning of the VDM control stream. 986 */ 987 __u64 vdm_ctrl_stream_base; 988 989 /** @vertex_helper: Helper program used for the vertex shader */ 990 struct drm_asahi_helper_program vertex_helper; 991 992 /** @fragment_helper: Helper program used for the fragment shader */ 993 struct drm_asahi_helper_program fragment_helper; 994 995 /** 996 * @isp_scissor_base: ISP_SCISSOR_BASE register value. GPU address of an 997 * array of scissor descriptors indexed in the render pass. 998 */ 999 __u64 isp_scissor_base; 1000 1001 /** 1002 * @isp_dbias_base: ISP_DBIAS_BASE register value. GPU address of an 1003 * array of depth bias values indexed in the render pass. 1004 */ 1005 __u64 isp_dbias_base; 1006 1007 /** 1008 * @isp_oclqry_base: ISP_OCLQRY_BASE register value. GPU address of an 1009 * array of occlusion query results written by the render pass. 1010 */ 1011 __u64 isp_oclqry_base; 1012 1013 /** @depth: Depth buffer */ 1014 struct drm_asahi_zls_buffer depth; 1015 1016 /** @stencil: Stencil buffer */ 1017 struct drm_asahi_zls_buffer stencil; 1018 1019 /** @zls_ctrl: ZLS_CTRL register value */ 1020 __u64 zls_ctrl; 1021 1022 /** @ppp_multisamplectl: PPP_MULTISAMPLECTL register value */ 1023 __u64 ppp_multisamplectl; 1024 1025 /** 1026 * @sampler_heap: Base address of the sampler heap. This heap is used 1027 * for both vertex shaders and fragment shaders. The registers are 1028 * per-stage, but there is no known use case for separate heaps. 1029 */ 1030 __u64 sampler_heap; 1031 1032 /** @ppp_ctrl: PPP_CTRL register value */ 1033 __u32 ppp_ctrl; 1034 1035 /** @width_px: Framebuffer width in pixels */ 1036 __u16 width_px; 1037 1038 /** @height_px: Framebuffer height in pixels */ 1039 __u16 height_px; 1040 1041 /** @layers: Number of layers in the framebuffer */ 1042 __u16 layers; 1043 1044 /** @sampler_count: Number of samplers in the sampler heap. */ 1045 __u16 sampler_count; 1046 1047 /** @utile_width_px: Width of a logical tilebuffer tile in pixels */ 1048 __u8 utile_width_px; 1049 1050 /** @utile_height_px: Height of a logical tilebuffer tile in pixels */ 1051 __u8 utile_height_px; 1052 1053 /** @samples: # of samples in the framebuffer. Must be 1, 2, or 4. */ 1054 __u8 samples; 1055 1056 /** @sample_size_B: # of bytes in the tilebuffer required per sample. */ 1057 __u8 sample_size_B; 1058 1059 /** 1060 * @isp_merge_upper_x: 32-bit float used in the hardware triangle 1061 * merging. Calculate as: tan(60 deg) * width. 1062 * 1063 * Making these values UAPI avoids requiring floating-point calculations 1064 * in the kernel in the hot path. 1065 */ 1066 __u32 isp_merge_upper_x; 1067 1068 /** 1069 * @isp_merge_upper_y: 32-bit float. Calculate as: tan(60 deg) * height. 1070 * See @isp_merge_upper_x. 1071 */ 1072 __u32 isp_merge_upper_y; 1073 1074 /** @bg: Background program run for each tile at the start */ 1075 struct drm_asahi_bg_eot bg; 1076 1077 /** @eot: End-of-tile program ran for each tile at the end */ 1078 struct drm_asahi_bg_eot eot; 1079 1080 /** 1081 * @partial_bg: Background program ran at the start of each tile when 1082 * resuming the render pass during a partial render. 1083 */ 1084 struct drm_asahi_bg_eot partial_bg; 1085 1086 /** 1087 * @partial_eot: End-of-tile program ran at the end of each tile when 1088 * pausing the render pass during a partial render. 1089 */ 1090 struct drm_asahi_bg_eot partial_eot; 1091 1092 /** 1093 * @isp_bgobjdepth: ISP_BGOBJDEPTH register value. This is the depth 1094 * buffer clear value, encoded in the depth buffer's format: either a 1095 * 32-bit float or a 16-bit unorm (with upper bits zeroed). 1096 */ 1097 __u32 isp_bgobjdepth; 1098 1099 /** 1100 * @isp_bgobjvals: ISP_BGOBJVALS register value. The bottom 8-bits 1101 * contain the stencil buffer clear value. 1102 */ 1103 __u32 isp_bgobjvals; 1104 1105 /** @ts_vtx: Timestamps for the vertex portion of the render */ 1106 struct drm_asahi_timestamps ts_vtx; 1107 1108 /** @ts_frag: Timestamps for the fragment portion of the render */ 1109 struct drm_asahi_timestamps ts_frag; 1110 }; 1111 1112 /** 1113 * struct drm_asahi_cmd_compute - Command to submit compute 1114 * 1115 * This command submits a control stream consisting of compute dispatches. There 1116 * is essentially no limit on how many compute dispatches may be included in a 1117 * single compute command, although timestamps are at command granularity. 1118 */ 1119 struct drm_asahi_cmd_compute { 1120 /** @flags: MBZ */ 1121 __u32 flags; 1122 1123 /** @sampler_count: Number of samplers in the sampler heap. */ 1124 __u32 sampler_count; 1125 1126 /** 1127 * @cdm_ctrl_stream_base: CDM_CTRL_STREAM_BASE register value. GPU 1128 * address to the beginning of the CDM control stream. 1129 */ 1130 __u64 cdm_ctrl_stream_base; 1131 1132 /** 1133 * @cdm_ctrl_stream_end: GPU base address to the end of the hardware 1134 * control stream. Note this only considers the first contiguous segment 1135 * of the control stream, as the stream might jump elsewhere. 1136 */ 1137 __u64 cdm_ctrl_stream_end; 1138 1139 /** @sampler_heap: Base address of the sampler heap. */ 1140 __u64 sampler_heap; 1141 1142 /** @helper: Helper program used for this compute command */ 1143 struct drm_asahi_helper_program helper; 1144 1145 /** @ts: Timestamps for the compute command */ 1146 struct drm_asahi_timestamps ts; 1147 }; 1148 1149 /** 1150 * struct drm_asahi_get_time - Arguments passed to DRM_IOCTL_ASAHI_GET_TIME 1151 */ 1152 struct drm_asahi_get_time { 1153 /** @flags: MBZ. */ 1154 __u64 flags; 1155 1156 /** @gpu_timestamp: On return, the GPU timestamp in nanoseconds. */ 1157 __u64 gpu_timestamp; 1158 }; 1159 1160 /** 1161 * DRM_IOCTL_ASAHI() - Build an Asahi IOCTL number 1162 * @__access: Access type. Must be R, W or RW. 1163 * @__id: One of the DRM_ASAHI_xxx id. 1164 * @__type: Suffix of the type being passed to the IOCTL. 1165 * 1166 * Don't use this macro directly, use the DRM_IOCTL_ASAHI_xxx 1167 * values instead. 1168 * 1169 * Return: An IOCTL number to be passed to ioctl() from userspace. 1170 */ 1171 #define DRM_IOCTL_ASAHI(__access, __id, __type) \ 1172 DRM_IO ## __access(DRM_COMMAND_BASE + DRM_ASAHI_ ## __id, \ 1173 struct drm_asahi_ ## __type) 1174 1175 /* Note: this is an enum so that it can be resolved by Rust bindgen. */ 1176 enum { 1177 DRM_IOCTL_ASAHI_GET_PARAMS = DRM_IOCTL_ASAHI(W, GET_PARAMS, get_params), 1178 DRM_IOCTL_ASAHI_GET_TIME = DRM_IOCTL_ASAHI(WR, GET_TIME, get_time), 1179 DRM_IOCTL_ASAHI_VM_CREATE = DRM_IOCTL_ASAHI(WR, VM_CREATE, vm_create), 1180 DRM_IOCTL_ASAHI_VM_DESTROY = DRM_IOCTL_ASAHI(W, VM_DESTROY, vm_destroy), 1181 DRM_IOCTL_ASAHI_VM_BIND = DRM_IOCTL_ASAHI(W, VM_BIND, vm_bind), 1182 DRM_IOCTL_ASAHI_GEM_CREATE = DRM_IOCTL_ASAHI(WR, GEM_CREATE, gem_create), 1183 DRM_IOCTL_ASAHI_GEM_MMAP_OFFSET = DRM_IOCTL_ASAHI(WR, GEM_MMAP_OFFSET, gem_mmap_offset), 1184 DRM_IOCTL_ASAHI_GEM_BIND_OBJECT = DRM_IOCTL_ASAHI(WR, GEM_BIND_OBJECT, gem_bind_object), 1185 DRM_IOCTL_ASAHI_QUEUE_CREATE = DRM_IOCTL_ASAHI(WR, QUEUE_CREATE, queue_create), 1186 DRM_IOCTL_ASAHI_QUEUE_DESTROY = DRM_IOCTL_ASAHI(W, QUEUE_DESTROY, queue_destroy), 1187 DRM_IOCTL_ASAHI_SUBMIT = DRM_IOCTL_ASAHI(W, SUBMIT, submit), 1188 }; 1189 1190 #if defined(__cplusplus) 1191 } 1192 #endif 1193 1194 #endif /* _ASAHI_DRM_H_ */ 1195