1 // SPDX-License-Identifier: GPL-2.0 or MIT 2 /* Copyright 2023 Collabora ltd. */ 3 4 #include <linux/iosys-map.h> 5 #include <linux/rwsem.h> 6 7 #include <drm/panthor_drm.h> 8 9 #include "panthor_device.h" 10 #include "panthor_gem.h" 11 #include "panthor_heap.h" 12 #include "panthor_mmu.h" 13 #include "panthor_regs.h" 14 15 /* 16 * The GPU heap context is an opaque structure used by the GPU to track the 17 * heap allocations. The driver should only touch it to initialize it (zero all 18 * fields). Because the CPU and GPU can both access this structure it is 19 * required to be GPU cache line aligned. 20 */ 21 #define HEAP_CONTEXT_SIZE 32 22 23 /** 24 * struct panthor_heap_chunk_header - Heap chunk header 25 */ 26 struct panthor_heap_chunk_header { 27 /** 28 * @next: Next heap chunk in the list. 29 * 30 * This is a GPU VA. 31 */ 32 u64 next; 33 34 /** @unknown: MBZ. */ 35 u32 unknown[14]; 36 }; 37 38 /** 39 * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks. 40 */ 41 struct panthor_heap_chunk { 42 /** @node: Used to insert the heap chunk in panthor_heap::chunks. */ 43 struct list_head node; 44 45 /** @bo: Buffer object backing the heap chunk. */ 46 struct panthor_kernel_bo *bo; 47 }; 48 49 /** 50 * struct panthor_heap - Structure used to manage tiler heap contexts. 51 */ 52 struct panthor_heap { 53 /** @chunks: List containing all heap chunks allocated so far. */ 54 struct list_head chunks; 55 56 /** @lock: Lock protecting insertion in the chunks list. */ 57 struct mutex lock; 58 59 /** @chunk_size: Size of each chunk. */ 60 u32 chunk_size; 61 62 /** @max_chunks: Maximum number of chunks. */ 63 u32 max_chunks; 64 65 /** 66 * @target_in_flight: Number of in-flight render passes after which 67 * we'd let the FW wait for fragment job to finish instead of allocating new chunks. 68 */ 69 u32 target_in_flight; 70 71 /** @chunk_count: Number of heap chunks currently allocated. */ 72 u32 chunk_count; 73 }; 74 75 #define MAX_HEAPS_PER_POOL 128 76 77 /** 78 * struct panthor_heap_pool - Pool of heap contexts 79 * 80 * The pool is attached to a panthor_file and can't be shared across processes. 81 */ 82 struct panthor_heap_pool { 83 /** @refcount: Reference count. */ 84 struct kref refcount; 85 86 /** @ptdev: Device. */ 87 struct panthor_device *ptdev; 88 89 /** @vm: VM this pool is bound to. */ 90 struct panthor_vm *vm; 91 92 /** @lock: Lock protecting access to @xa. */ 93 struct rw_semaphore lock; 94 95 /** @xa: Array storing panthor_heap objects. */ 96 struct xarray xa; 97 98 /** @gpu_contexts: Buffer object containing the GPU heap contexts. */ 99 struct panthor_kernel_bo *gpu_contexts; 100 101 /** @size: Size of all chunks across all heaps in the pool. */ 102 atomic_t size; 103 }; 104 105 static int panthor_heap_ctx_stride(struct panthor_device *ptdev) 106 { 107 u32 l2_features = ptdev->gpu_info.l2_features; 108 u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features); 109 110 return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size); 111 } 112 113 static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id) 114 { 115 return panthor_heap_ctx_stride(pool->ptdev) * id; 116 } 117 118 static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id) 119 { 120 return pool->gpu_contexts->kmap + 121 panthor_get_heap_ctx_offset(pool, id); 122 } 123 124 static void panthor_free_heap_chunk(struct panthor_heap_pool *pool, 125 struct panthor_heap *heap, 126 struct panthor_heap_chunk *chunk) 127 { 128 mutex_lock(&heap->lock); 129 list_del(&chunk->node); 130 heap->chunk_count--; 131 mutex_unlock(&heap->lock); 132 133 atomic_sub(heap->chunk_size, &pool->size); 134 135 panthor_kernel_bo_destroy(chunk->bo); 136 kfree(chunk); 137 } 138 139 static int panthor_alloc_heap_chunk(struct panthor_heap_pool *pool, 140 struct panthor_heap *heap, 141 bool initial_chunk) 142 { 143 struct panthor_heap_chunk *chunk; 144 struct panthor_heap_chunk_header *hdr; 145 int ret; 146 147 chunk = kmalloc(sizeof(*chunk), GFP_KERNEL); 148 if (!chunk) 149 return -ENOMEM; 150 151 chunk->bo = panthor_kernel_bo_create(pool->ptdev, pool->vm, heap->chunk_size, 152 DRM_PANTHOR_BO_NO_MMAP, 153 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, 154 PANTHOR_VM_KERNEL_AUTO_VA); 155 if (IS_ERR(chunk->bo)) { 156 ret = PTR_ERR(chunk->bo); 157 goto err_free_chunk; 158 } 159 160 ret = panthor_kernel_bo_vmap(chunk->bo); 161 if (ret) 162 goto err_destroy_bo; 163 164 hdr = chunk->bo->kmap; 165 memset(hdr, 0, sizeof(*hdr)); 166 167 if (initial_chunk && !list_empty(&heap->chunks)) { 168 struct panthor_heap_chunk *prev_chunk; 169 u64 prev_gpuva; 170 171 prev_chunk = list_first_entry(&heap->chunks, 172 struct panthor_heap_chunk, 173 node); 174 175 prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo); 176 hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) | 177 (heap->chunk_size >> 12); 178 } 179 180 panthor_kernel_bo_vunmap(chunk->bo); 181 182 mutex_lock(&heap->lock); 183 list_add(&chunk->node, &heap->chunks); 184 heap->chunk_count++; 185 mutex_unlock(&heap->lock); 186 187 atomic_add(heap->chunk_size, &pool->size); 188 189 return 0; 190 191 err_destroy_bo: 192 panthor_kernel_bo_destroy(chunk->bo); 193 194 err_free_chunk: 195 kfree(chunk); 196 197 return ret; 198 } 199 200 static void panthor_free_heap_chunks(struct panthor_heap_pool *pool, 201 struct panthor_heap *heap) 202 { 203 struct panthor_heap_chunk *chunk, *tmp; 204 205 list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) 206 panthor_free_heap_chunk(pool, heap, chunk); 207 } 208 209 static int panthor_alloc_heap_chunks(struct panthor_heap_pool *pool, 210 struct panthor_heap *heap, 211 u32 chunk_count) 212 { 213 int ret; 214 u32 i; 215 216 for (i = 0; i < chunk_count; i++) { 217 ret = panthor_alloc_heap_chunk(pool, heap, true); 218 if (ret) 219 return ret; 220 } 221 222 return 0; 223 } 224 225 static int 226 panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle) 227 { 228 struct panthor_heap *heap; 229 230 heap = xa_erase(&pool->xa, handle); 231 if (!heap) 232 return -EINVAL; 233 234 panthor_free_heap_chunks(pool, heap); 235 mutex_destroy(&heap->lock); 236 kfree(heap); 237 return 0; 238 } 239 240 /** 241 * panthor_heap_destroy() - Destroy a heap context 242 * @pool: Pool this context belongs to. 243 * @handle: Handle returned by panthor_heap_create(). 244 */ 245 int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle) 246 { 247 int ret; 248 249 down_write(&pool->lock); 250 ret = panthor_heap_destroy_locked(pool, handle); 251 up_write(&pool->lock); 252 253 return ret; 254 } 255 256 /** 257 * panthor_heap_create() - Create a heap context 258 * @pool: Pool to instantiate the heap context from. 259 * @initial_chunk_count: Number of chunk allocated at initialization time. 260 * Must be at least 1. 261 * @chunk_size: The size of each chunk. Must be page-aligned and lie in the 262 * [128k:8M] range. 263 * @max_chunks: Maximum number of chunks that can be allocated. 264 * @target_in_flight: Maximum number of in-flight render passes. 265 * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap 266 * context. 267 * @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk 268 * assigned to the heap context. 269 * 270 * Return: a positive handle on success, a negative error otherwise. 271 */ 272 int panthor_heap_create(struct panthor_heap_pool *pool, 273 u32 initial_chunk_count, 274 u32 chunk_size, 275 u32 max_chunks, 276 u32 target_in_flight, 277 u64 *heap_ctx_gpu_va, 278 u64 *first_chunk_gpu_va) 279 { 280 struct panthor_heap *heap; 281 struct panthor_heap_chunk *first_chunk; 282 struct panthor_vm *vm; 283 int ret = 0; 284 u32 id; 285 286 if (initial_chunk_count == 0) 287 return -EINVAL; 288 289 if (initial_chunk_count > max_chunks) 290 return -EINVAL; 291 292 if (!IS_ALIGNED(chunk_size, PAGE_SIZE) || 293 chunk_size < SZ_128K || chunk_size > SZ_8M) 294 return -EINVAL; 295 296 down_read(&pool->lock); 297 vm = panthor_vm_get(pool->vm); 298 up_read(&pool->lock); 299 300 /* The pool has been destroyed, we can't create a new heap. */ 301 if (!vm) 302 return -EINVAL; 303 304 heap = kzalloc(sizeof(*heap), GFP_KERNEL); 305 if (!heap) { 306 ret = -ENOMEM; 307 goto err_put_vm; 308 } 309 310 mutex_init(&heap->lock); 311 INIT_LIST_HEAD(&heap->chunks); 312 heap->chunk_size = chunk_size; 313 heap->max_chunks = max_chunks; 314 heap->target_in_flight = target_in_flight; 315 316 ret = panthor_alloc_heap_chunks(pool, heap, initial_chunk_count); 317 if (ret) 318 goto err_free_heap; 319 320 first_chunk = list_first_entry(&heap->chunks, 321 struct panthor_heap_chunk, 322 node); 323 *first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo); 324 325 down_write(&pool->lock); 326 /* The pool has been destroyed, we can't create a new heap. */ 327 if (!pool->vm) { 328 ret = -EINVAL; 329 } else { 330 ret = xa_alloc(&pool->xa, &id, heap, 331 XA_LIMIT(0, MAX_HEAPS_PER_POOL - 1), GFP_KERNEL); 332 if (!ret) { 333 void *gpu_ctx = panthor_get_heap_ctx(pool, id); 334 335 memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev)); 336 *heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) + 337 panthor_get_heap_ctx_offset(pool, id); 338 } 339 } 340 up_write(&pool->lock); 341 342 if (ret) 343 goto err_free_heap; 344 345 panthor_vm_put(vm); 346 return id; 347 348 err_free_heap: 349 panthor_free_heap_chunks(pool, heap); 350 mutex_destroy(&heap->lock); 351 kfree(heap); 352 353 err_put_vm: 354 panthor_vm_put(vm); 355 return ret; 356 } 357 358 /** 359 * panthor_heap_return_chunk() - Return an unused heap chunk 360 * @pool: The pool this heap belongs to. 361 * @heap_gpu_va: The GPU address of the heap context. 362 * @chunk_gpu_va: The chunk VA to return. 363 * 364 * This function is used when a chunk allocated with panthor_heap_grow() 365 * couldn't be linked to the heap context through the FW interface because 366 * the group requesting the allocation was scheduled out in the meantime. 367 */ 368 int panthor_heap_return_chunk(struct panthor_heap_pool *pool, 369 u64 heap_gpu_va, 370 u64 chunk_gpu_va) 371 { 372 u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts); 373 u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev); 374 struct panthor_heap_chunk *chunk, *tmp, *removed = NULL; 375 struct panthor_heap *heap; 376 int ret; 377 378 if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL) 379 return -EINVAL; 380 381 down_read(&pool->lock); 382 heap = xa_load(&pool->xa, heap_id); 383 if (!heap) { 384 ret = -EINVAL; 385 goto out_unlock; 386 } 387 388 chunk_gpu_va &= GENMASK_ULL(63, 12); 389 390 mutex_lock(&heap->lock); 391 list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) { 392 if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) { 393 removed = chunk; 394 list_del(&chunk->node); 395 heap->chunk_count--; 396 atomic_sub(heap->chunk_size, &pool->size); 397 break; 398 } 399 } 400 mutex_unlock(&heap->lock); 401 402 if (removed) { 403 panthor_kernel_bo_destroy(chunk->bo); 404 kfree(chunk); 405 ret = 0; 406 } else { 407 ret = -EINVAL; 408 } 409 410 out_unlock: 411 up_read(&pool->lock); 412 return ret; 413 } 414 415 /** 416 * panthor_heap_grow() - Make a heap context grow. 417 * @pool: The pool this heap belongs to. 418 * @heap_gpu_va: The GPU address of the heap context. 419 * @renderpasses_in_flight: Number of render passes currently in-flight. 420 * @pending_frag_count: Number of fragment jobs waiting for execution/completion. 421 * @new_chunk_gpu_va: Pointer used to return the chunk VA. 422 * 423 * Return: 424 * - 0 if a new heap was allocated 425 * - -ENOMEM if the tiler context reached the maximum number of chunks 426 * or if too many render passes are in-flight 427 * or if the allocation failed 428 * - -EINVAL if any of the arguments passed to panthor_heap_grow() is invalid 429 */ 430 int panthor_heap_grow(struct panthor_heap_pool *pool, 431 u64 heap_gpu_va, 432 u32 renderpasses_in_flight, 433 u32 pending_frag_count, 434 u64 *new_chunk_gpu_va) 435 { 436 u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts); 437 u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev); 438 struct panthor_heap_chunk *chunk; 439 struct panthor_heap *heap; 440 int ret; 441 442 if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL) 443 return -EINVAL; 444 445 down_read(&pool->lock); 446 heap = xa_load(&pool->xa, heap_id); 447 if (!heap) { 448 ret = -EINVAL; 449 goto out_unlock; 450 } 451 452 /* If we reached the target in-flight render passes, or if we 453 * reached the maximum number of chunks, let the FW figure another way to 454 * find some memory (wait for render passes to finish, or call the exception 455 * handler provided by the userspace driver, if any). 456 */ 457 if (renderpasses_in_flight > heap->target_in_flight || 458 heap->chunk_count >= heap->max_chunks) { 459 ret = -ENOMEM; 460 goto out_unlock; 461 } 462 463 /* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation, 464 * which goes through the blocking allocation path. Ultimately, we 465 * want a non-blocking allocation, so we can immediately report to the 466 * FW when the system is running out of memory. In that case, the FW 467 * can call a user-provided exception handler, which might try to free 468 * some tiler memory by issuing an intermediate fragment job. If the 469 * exception handler can't do anything, it will flag the queue as 470 * faulty so the job that triggered this tiler chunk allocation and all 471 * further jobs in this queue fail immediately instead of having to 472 * wait for the job timeout. 473 */ 474 ret = panthor_alloc_heap_chunk(pool, heap, false); 475 if (ret) 476 goto out_unlock; 477 478 chunk = list_first_entry(&heap->chunks, 479 struct panthor_heap_chunk, 480 node); 481 *new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) | 482 (heap->chunk_size >> 12); 483 ret = 0; 484 485 out_unlock: 486 up_read(&pool->lock); 487 return ret; 488 } 489 490 static void panthor_heap_pool_release(struct kref *refcount) 491 { 492 struct panthor_heap_pool *pool = 493 container_of(refcount, struct panthor_heap_pool, refcount); 494 495 xa_destroy(&pool->xa); 496 kfree(pool); 497 } 498 499 /** 500 * panthor_heap_pool_put() - Release a heap pool reference 501 * @pool: Pool to release the reference on. Can be NULL. 502 */ 503 void panthor_heap_pool_put(struct panthor_heap_pool *pool) 504 { 505 if (pool) 506 kref_put(&pool->refcount, panthor_heap_pool_release); 507 } 508 509 /** 510 * panthor_heap_pool_get() - Get a heap pool reference 511 * @pool: Pool to get the reference on. Can be NULL. 512 * 513 * Return: @pool. 514 */ 515 struct panthor_heap_pool * 516 panthor_heap_pool_get(struct panthor_heap_pool *pool) 517 { 518 if (pool) 519 kref_get(&pool->refcount); 520 521 return pool; 522 } 523 524 /** 525 * panthor_heap_pool_create() - Create a heap pool 526 * @ptdev: Device. 527 * @vm: The VM this heap pool will be attached to. 528 * 529 * Heap pools might contain up to 128 heap contexts, and are per-VM. 530 * 531 * Return: A valid pointer on success, a negative error code otherwise. 532 */ 533 struct panthor_heap_pool * 534 panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm) 535 { 536 size_t bosize = ALIGN(MAX_HEAPS_PER_POOL * 537 panthor_heap_ctx_stride(ptdev), 538 4096); 539 struct panthor_heap_pool *pool; 540 int ret = 0; 541 542 pool = kzalloc(sizeof(*pool), GFP_KERNEL); 543 if (!pool) 544 return ERR_PTR(-ENOMEM); 545 546 /* We want a weak ref here: the heap pool belongs to the VM, so we're 547 * sure that, as long as the heap pool exists, the VM exists too. 548 */ 549 pool->vm = vm; 550 pool->ptdev = ptdev; 551 init_rwsem(&pool->lock); 552 xa_init_flags(&pool->xa, XA_FLAGS_ALLOC); 553 kref_init(&pool->refcount); 554 555 pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize, 556 DRM_PANTHOR_BO_NO_MMAP, 557 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, 558 PANTHOR_VM_KERNEL_AUTO_VA); 559 if (IS_ERR(pool->gpu_contexts)) { 560 ret = PTR_ERR(pool->gpu_contexts); 561 goto err_destroy_pool; 562 } 563 564 ret = panthor_kernel_bo_vmap(pool->gpu_contexts); 565 if (ret) 566 goto err_destroy_pool; 567 568 atomic_add(pool->gpu_contexts->obj->size, &pool->size); 569 570 return pool; 571 572 err_destroy_pool: 573 panthor_heap_pool_destroy(pool); 574 return ERR_PTR(ret); 575 } 576 577 /** 578 * panthor_heap_pool_destroy() - Destroy a heap pool. 579 * @pool: Pool to destroy. 580 * 581 * This function destroys all heap contexts and their resources. Thus 582 * preventing any use of the heap context or the chunk attached to them 583 * after that point. 584 * 585 * If the GPU still has access to some heap contexts, a fault should be 586 * triggered, which should flag the command stream groups using these 587 * context as faulty. 588 * 589 * The heap pool object is only released when all references to this pool 590 * are released. 591 */ 592 void panthor_heap_pool_destroy(struct panthor_heap_pool *pool) 593 { 594 struct panthor_heap *heap; 595 unsigned long i; 596 597 if (!pool) 598 return; 599 600 down_write(&pool->lock); 601 xa_for_each(&pool->xa, i, heap) 602 drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i)); 603 604 if (!IS_ERR_OR_NULL(pool->gpu_contexts)) { 605 atomic_sub(pool->gpu_contexts->obj->size, &pool->size); 606 panthor_kernel_bo_destroy(pool->gpu_contexts); 607 } 608 609 /* Reflects the fact the pool has been destroyed. */ 610 pool->vm = NULL; 611 up_write(&pool->lock); 612 613 panthor_heap_pool_put(pool); 614 } 615 616 /** 617 * panthor_heap_pool_size() - Get a heap pool's total size 618 * @pool: Pool whose total chunks size to return 619 * 620 * Returns the aggregated size of all chunks for all heaps in the pool 621 * 622 */ 623 size_t panthor_heap_pool_size(struct panthor_heap_pool *pool) 624 { 625 if (!pool) 626 return 0; 627 628 return atomic_read(&pool->size); 629 } 630