1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2020 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: Christian König 24 */ 25 26 /* Pooling of allocated pages is necessary because changing the caching 27 * attributes on x86 of the linear mapping requires a costly cross CPU TLB 28 * invalidate for those addresses. 29 * 30 * Additional to that allocations from the DMA coherent API are pooled as well 31 * cause they are rather slow compared to alloc_pages+map. 32 */ 33 34 #include <linux/module.h> 35 #include <linux/dma-mapping.h> 36 #include <linux/debugfs.h> 37 #include <linux/highmem.h> 38 #include <linux/sched/mm.h> 39 40 #ifdef CONFIG_X86 41 #include <asm/set_memory.h> 42 #endif 43 44 #include <drm/ttm/ttm_backup.h> 45 #include <drm/ttm/ttm_pool.h> 46 #include <drm/ttm/ttm_tt.h> 47 #include <drm/ttm/ttm_bo.h> 48 49 #include "ttm_module.h" 50 51 #ifdef CONFIG_FAULT_INJECTION 52 #include <linux/fault-inject.h> 53 static DECLARE_FAULT_ATTR(backup_fault_inject); 54 #else 55 #define should_fail(...) false 56 #endif 57 58 /** 59 * struct ttm_pool_dma - Helper object for coherent DMA mappings 60 * 61 * @addr: original DMA address returned for the mapping 62 * @vaddr: original vaddr return for the mapping and order in the lower bits 63 */ 64 struct ttm_pool_dma { 65 dma_addr_t addr; 66 unsigned long vaddr; 67 }; 68 69 /** 70 * struct ttm_pool_alloc_state - Current state of the tt page allocation process 71 * @pages: Pointer to the next tt page pointer to populate. 72 * @caching_divide: Pointer to the first page pointer whose page has a staged but 73 * not committed caching transition from write-back to @tt_caching. 74 * @dma_addr: Pointer to the next tt dma_address entry to populate if any. 75 * @remaining_pages: Remaining pages to populate. 76 * @tt_caching: The requested cpu-caching for the pages allocated. 77 */ 78 struct ttm_pool_alloc_state { 79 struct page **pages; 80 struct page **caching_divide; 81 dma_addr_t *dma_addr; 82 pgoff_t remaining_pages; 83 enum ttm_caching tt_caching; 84 }; 85 86 /** 87 * struct ttm_pool_tt_restore - State representing restore from backup 88 * @pool: The pool used for page allocation while restoring. 89 * @snapshot_alloc: A snapshot of the most recent struct ttm_pool_alloc_state. 90 * @alloced_page: Pointer to the page most recently allocated from a pool or system. 91 * @first_dma: The dma address corresponding to @alloced_page if dma_mapping 92 * is requested. 93 * @alloced_pages: The number of allocated pages present in the struct ttm_tt 94 * page vector from this restore session. 95 * @restored_pages: The number of 4K pages restored for @alloced_page (which 96 * is typically a multi-order page). 97 * @page_caching: The struct ttm_tt requested caching 98 * @order: The order of @alloced_page. 99 * 100 * Recovery from backup might fail when we've recovered less than the 101 * full ttm_tt. In order not to loose any data (yet), keep information 102 * around that allows us to restart a failed ttm backup recovery. 103 */ 104 struct ttm_pool_tt_restore { 105 struct ttm_pool *pool; 106 struct ttm_pool_alloc_state snapshot_alloc; 107 struct page *alloced_page; 108 dma_addr_t first_dma; 109 pgoff_t alloced_pages; 110 pgoff_t restored_pages; 111 enum ttm_caching page_caching; 112 unsigned int order; 113 }; 114 115 static unsigned long page_pool_size; 116 117 MODULE_PARM_DESC(page_pool_size, "Number of pages in the WC/UC/DMA pool"); 118 module_param(page_pool_size, ulong, 0644); 119 120 static atomic_long_t allocated_pages; 121 122 static struct ttm_pool_type global_write_combined[NR_PAGE_ORDERS]; 123 static struct ttm_pool_type global_uncached[NR_PAGE_ORDERS]; 124 125 static struct ttm_pool_type global_dma32_write_combined[NR_PAGE_ORDERS]; 126 static struct ttm_pool_type global_dma32_uncached[NR_PAGE_ORDERS]; 127 128 static spinlock_t shrinker_lock; 129 static struct list_head shrinker_list; 130 static struct shrinker *mm_shrinker; 131 static DECLARE_RWSEM(pool_shrink_rwsem); 132 133 /* Allocate pages of size 1 << order with the given gfp_flags */ 134 static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags, 135 unsigned int order) 136 { 137 unsigned long attr = DMA_ATTR_FORCE_CONTIGUOUS; 138 struct ttm_pool_dma *dma; 139 struct page *p; 140 void *vaddr; 141 142 /* Don't set the __GFP_COMP flag for higher order allocations. 143 * Mapping pages directly into an userspace process and calling 144 * put_page() on a TTM allocated page is illegal. 145 */ 146 if (order) 147 gfp_flags |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | 148 __GFP_THISNODE; 149 150 if (!pool->use_dma_alloc) { 151 p = alloc_pages_node(pool->nid, gfp_flags, order); 152 if (p) 153 p->private = order; 154 return p; 155 } 156 157 dma = kmalloc(sizeof(*dma), GFP_KERNEL); 158 if (!dma) 159 return NULL; 160 161 if (order) 162 attr |= DMA_ATTR_NO_WARN; 163 164 vaddr = dma_alloc_attrs(pool->dev, (1ULL << order) * PAGE_SIZE, 165 &dma->addr, gfp_flags, attr); 166 if (!vaddr) 167 goto error_free; 168 169 /* TODO: This is an illegal abuse of the DMA API, but we need to rework 170 * TTM page fault handling and extend the DMA API to clean this up. 171 */ 172 if (is_vmalloc_addr(vaddr)) 173 p = vmalloc_to_page(vaddr); 174 else 175 p = virt_to_page(vaddr); 176 177 dma->vaddr = (unsigned long)vaddr | order; 178 p->private = (unsigned long)dma; 179 return p; 180 181 error_free: 182 kfree(dma); 183 return NULL; 184 } 185 186 /* Reset the caching and pages of size 1 << order */ 187 static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching, 188 unsigned int order, struct page *p) 189 { 190 unsigned long attr = DMA_ATTR_FORCE_CONTIGUOUS; 191 struct ttm_pool_dma *dma; 192 void *vaddr; 193 194 #ifdef CONFIG_X86 195 /* We don't care that set_pages_wb is inefficient here. This is only 196 * used when we have to shrink and CPU overhead is irrelevant then. 197 */ 198 if (caching != ttm_cached && !PageHighMem(p)) 199 set_pages_wb(p, 1 << order); 200 #endif 201 202 if (!pool || !pool->use_dma_alloc) { 203 __free_pages(p, order); 204 return; 205 } 206 207 if (order) 208 attr |= DMA_ATTR_NO_WARN; 209 210 dma = (void *)p->private; 211 vaddr = (void *)(dma->vaddr & PAGE_MASK); 212 dma_free_attrs(pool->dev, (1UL << order) * PAGE_SIZE, vaddr, dma->addr, 213 attr); 214 kfree(dma); 215 } 216 217 /* Apply any cpu-caching deferred during page allocation */ 218 static int ttm_pool_apply_caching(struct ttm_pool_alloc_state *alloc) 219 { 220 #ifdef CONFIG_X86 221 unsigned int num_pages = alloc->pages - alloc->caching_divide; 222 223 if (!num_pages) 224 return 0; 225 226 switch (alloc->tt_caching) { 227 case ttm_cached: 228 break; 229 case ttm_write_combined: 230 return set_pages_array_wc(alloc->caching_divide, num_pages); 231 case ttm_uncached: 232 return set_pages_array_uc(alloc->caching_divide, num_pages); 233 } 234 #endif 235 alloc->caching_divide = alloc->pages; 236 return 0; 237 } 238 239 /* DMA Map pages of 1 << order size and return the resulting dma_address. */ 240 static int ttm_pool_map(struct ttm_pool *pool, unsigned int order, 241 struct page *p, dma_addr_t *dma_addr) 242 { 243 dma_addr_t addr; 244 245 if (pool->use_dma_alloc) { 246 struct ttm_pool_dma *dma = (void *)p->private; 247 248 addr = dma->addr; 249 } else { 250 size_t size = (1ULL << order) * PAGE_SIZE; 251 252 addr = dma_map_page(pool->dev, p, 0, size, DMA_BIDIRECTIONAL); 253 if (dma_mapping_error(pool->dev, addr)) 254 return -EFAULT; 255 } 256 257 *dma_addr = addr; 258 259 return 0; 260 } 261 262 /* Unmap pages of 1 << order size */ 263 static void ttm_pool_unmap(struct ttm_pool *pool, dma_addr_t dma_addr, 264 unsigned int num_pages) 265 { 266 /* Unmapped while freeing the page */ 267 if (pool->use_dma_alloc) 268 return; 269 270 dma_unmap_page(pool->dev, dma_addr, (long)num_pages << PAGE_SHIFT, 271 DMA_BIDIRECTIONAL); 272 } 273 274 /* Give pages into a specific pool_type */ 275 static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p) 276 { 277 unsigned int i, num_pages = 1 << pt->order; 278 279 for (i = 0; i < num_pages; ++i) { 280 if (PageHighMem(p)) 281 clear_highpage(p + i); 282 else 283 clear_page(page_address(p + i)); 284 } 285 286 spin_lock(&pt->lock); 287 list_add(&p->lru, &pt->pages); 288 spin_unlock(&pt->lock); 289 atomic_long_add(1 << pt->order, &allocated_pages); 290 } 291 292 /* Take pages from a specific pool_type, return NULL when nothing available */ 293 static struct page *ttm_pool_type_take(struct ttm_pool_type *pt) 294 { 295 struct page *p; 296 297 spin_lock(&pt->lock); 298 p = list_first_entry_or_null(&pt->pages, typeof(*p), lru); 299 if (p) { 300 atomic_long_sub(1 << pt->order, &allocated_pages); 301 list_del(&p->lru); 302 } 303 spin_unlock(&pt->lock); 304 305 return p; 306 } 307 308 /* Initialize and add a pool type to the global shrinker list */ 309 static void ttm_pool_type_init(struct ttm_pool_type *pt, struct ttm_pool *pool, 310 enum ttm_caching caching, unsigned int order) 311 { 312 pt->pool = pool; 313 pt->caching = caching; 314 pt->order = order; 315 spin_lock_init(&pt->lock); 316 INIT_LIST_HEAD(&pt->pages); 317 318 spin_lock(&shrinker_lock); 319 list_add_tail(&pt->shrinker_list, &shrinker_list); 320 spin_unlock(&shrinker_lock); 321 } 322 323 /* Remove a pool_type from the global shrinker list and free all pages */ 324 static void ttm_pool_type_fini(struct ttm_pool_type *pt) 325 { 326 struct page *p; 327 328 spin_lock(&shrinker_lock); 329 list_del(&pt->shrinker_list); 330 spin_unlock(&shrinker_lock); 331 332 while ((p = ttm_pool_type_take(pt))) 333 ttm_pool_free_page(pt->pool, pt->caching, pt->order, p); 334 } 335 336 /* Return the pool_type to use for the given caching and order */ 337 static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool, 338 enum ttm_caching caching, 339 unsigned int order) 340 { 341 if (pool->use_dma_alloc) 342 return &pool->caching[caching].orders[order]; 343 344 #ifdef CONFIG_X86 345 switch (caching) { 346 case ttm_write_combined: 347 if (pool->nid != NUMA_NO_NODE) 348 return &pool->caching[caching].orders[order]; 349 350 if (pool->use_dma32) 351 return &global_dma32_write_combined[order]; 352 353 return &global_write_combined[order]; 354 case ttm_uncached: 355 if (pool->nid != NUMA_NO_NODE) 356 return &pool->caching[caching].orders[order]; 357 358 if (pool->use_dma32) 359 return &global_dma32_uncached[order]; 360 361 return &global_uncached[order]; 362 default: 363 break; 364 } 365 #endif 366 367 return NULL; 368 } 369 370 /* Free pages using the global shrinker list */ 371 static unsigned int ttm_pool_shrink(void) 372 { 373 struct ttm_pool_type *pt; 374 unsigned int num_pages; 375 struct page *p; 376 377 down_read(&pool_shrink_rwsem); 378 spin_lock(&shrinker_lock); 379 pt = list_first_entry(&shrinker_list, typeof(*pt), shrinker_list); 380 list_move_tail(&pt->shrinker_list, &shrinker_list); 381 spin_unlock(&shrinker_lock); 382 383 p = ttm_pool_type_take(pt); 384 if (p) { 385 ttm_pool_free_page(pt->pool, pt->caching, pt->order, p); 386 num_pages = 1 << pt->order; 387 } else { 388 num_pages = 0; 389 } 390 up_read(&pool_shrink_rwsem); 391 392 return num_pages; 393 } 394 395 /* Return the allocation order based for a page */ 396 static unsigned int ttm_pool_page_order(struct ttm_pool *pool, struct page *p) 397 { 398 if (pool->use_dma_alloc) { 399 struct ttm_pool_dma *dma = (void *)p->private; 400 401 return dma->vaddr & ~PAGE_MASK; 402 } 403 404 return p->private; 405 } 406 407 /* 408 * Split larger pages so that we can free each PAGE_SIZE page as soon 409 * as it has been backed up, in order to avoid memory pressure during 410 * reclaim. 411 */ 412 static void ttm_pool_split_for_swap(struct ttm_pool *pool, struct page *p) 413 { 414 unsigned int order = ttm_pool_page_order(pool, p); 415 pgoff_t nr; 416 417 if (!order) 418 return; 419 420 split_page(p, order); 421 nr = 1UL << order; 422 while (nr--) 423 (p++)->private = 0; 424 } 425 426 /** 427 * DOC: Partial backup and restoration of a struct ttm_tt. 428 * 429 * Swapout using ttm_backup_backup_page() and swapin using 430 * ttm_backup_copy_page() may fail. 431 * The former most likely due to lack of swap-space or memory, the latter due 432 * to lack of memory or because of signal interruption during waits. 433 * 434 * Backup failure is easily handled by using a ttm_tt pages vector that holds 435 * both backup handles and page pointers. This has to be taken into account when 436 * restoring such a ttm_tt from backup, and when freeing it while backed up. 437 * When restoring, for simplicity, new pages are actually allocated from the 438 * pool and the contents of any old pages are copied in and then the old pages 439 * are released. 440 * 441 * For restoration failures, the struct ttm_pool_tt_restore holds sufficient state 442 * to be able to resume an interrupted restore, and that structure is freed once 443 * the restoration is complete. If the struct ttm_tt is destroyed while there 444 * is a valid struct ttm_pool_tt_restore attached, that is also properly taken 445 * care of. 446 */ 447 448 /* Is restore ongoing for the currently allocated page? */ 449 static bool ttm_pool_restore_valid(const struct ttm_pool_tt_restore *restore) 450 { 451 return restore && restore->restored_pages < (1 << restore->order); 452 } 453 454 /* DMA unmap and free a multi-order page, either to the relevant pool or to system. */ 455 static pgoff_t ttm_pool_unmap_and_free(struct ttm_pool *pool, struct page *page, 456 const dma_addr_t *dma_addr, enum ttm_caching caching) 457 { 458 struct ttm_pool_type *pt = NULL; 459 unsigned int order; 460 pgoff_t nr; 461 462 if (pool) { 463 order = ttm_pool_page_order(pool, page); 464 nr = (1UL << order); 465 if (dma_addr) 466 ttm_pool_unmap(pool, *dma_addr, nr); 467 468 pt = ttm_pool_select_type(pool, caching, order); 469 } else { 470 order = page->private; 471 nr = (1UL << order); 472 } 473 474 if (pt) 475 ttm_pool_type_give(pt, page); 476 else 477 ttm_pool_free_page(pool, caching, order, page); 478 479 return nr; 480 } 481 482 /* Populate the page-array using the most recent allocated multi-order page. */ 483 static void ttm_pool_allocated_page_commit(struct page *allocated, 484 dma_addr_t first_dma, 485 struct ttm_pool_alloc_state *alloc, 486 pgoff_t nr) 487 { 488 pgoff_t i; 489 490 for (i = 0; i < nr; ++i) 491 *alloc->pages++ = allocated++; 492 493 alloc->remaining_pages -= nr; 494 495 if (!alloc->dma_addr) 496 return; 497 498 for (i = 0; i < nr; ++i) { 499 *alloc->dma_addr++ = first_dma; 500 first_dma += PAGE_SIZE; 501 } 502 } 503 504 /* 505 * When restoring, restore backed-up content to the newly allocated page and 506 * if successful, populate the page-table and dma-address arrays. 507 */ 508 static int ttm_pool_restore_commit(struct ttm_pool_tt_restore *restore, 509 struct ttm_backup *backup, 510 const struct ttm_operation_ctx *ctx, 511 struct ttm_pool_alloc_state *alloc) 512 513 { 514 pgoff_t i, nr = 1UL << restore->order; 515 struct page **first_page = alloc->pages; 516 struct page *p; 517 int ret = 0; 518 519 for (i = restore->restored_pages; i < nr; ++i) { 520 p = first_page[i]; 521 if (ttm_backup_page_ptr_is_handle(p)) { 522 unsigned long handle = ttm_backup_page_ptr_to_handle(p); 523 524 if (IS_ENABLED(CONFIG_FAULT_INJECTION) && ctx->interruptible && 525 should_fail(&backup_fault_inject, 1)) { 526 ret = -EINTR; 527 break; 528 } 529 530 if (handle == 0) { 531 restore->restored_pages++; 532 continue; 533 } 534 535 ret = ttm_backup_copy_page(backup, restore->alloced_page + i, 536 handle, ctx->interruptible); 537 if (ret) 538 break; 539 540 ttm_backup_drop(backup, handle); 541 } else if (p) { 542 /* 543 * We could probably avoid splitting the old page 544 * using clever logic, but ATM we don't care, as 545 * we prioritize releasing memory ASAP. Note that 546 * here, the old retained page is always write-back 547 * cached. 548 */ 549 ttm_pool_split_for_swap(restore->pool, p); 550 copy_highpage(restore->alloced_page + i, p); 551 __free_pages(p, 0); 552 } 553 554 restore->restored_pages++; 555 first_page[i] = ttm_backup_handle_to_page_ptr(0); 556 } 557 558 if (ret) { 559 if (!restore->restored_pages) { 560 dma_addr_t *dma_addr = alloc->dma_addr ? &restore->first_dma : NULL; 561 562 ttm_pool_unmap_and_free(restore->pool, restore->alloced_page, 563 dma_addr, restore->page_caching); 564 restore->restored_pages = nr; 565 } 566 return ret; 567 } 568 569 ttm_pool_allocated_page_commit(restore->alloced_page, restore->first_dma, 570 alloc, nr); 571 if (restore->page_caching == alloc->tt_caching || PageHighMem(restore->alloced_page)) 572 alloc->caching_divide = alloc->pages; 573 restore->snapshot_alloc = *alloc; 574 restore->alloced_pages += nr; 575 576 return 0; 577 } 578 579 /* If restoring, save information needed for ttm_pool_restore_commit(). */ 580 static void 581 ttm_pool_page_allocated_restore(struct ttm_pool *pool, unsigned int order, 582 struct page *p, 583 enum ttm_caching page_caching, 584 dma_addr_t first_dma, 585 struct ttm_pool_tt_restore *restore, 586 const struct ttm_pool_alloc_state *alloc) 587 { 588 restore->pool = pool; 589 restore->order = order; 590 restore->restored_pages = 0; 591 restore->page_caching = page_caching; 592 restore->first_dma = first_dma; 593 restore->alloced_page = p; 594 restore->snapshot_alloc = *alloc; 595 } 596 597 /* 598 * Called when we got a page, either from a pool or newly allocated. 599 * if needed, dma map the page and populate the dma address array. 600 * Populate the page address array. 601 * If the caching is consistent, update any deferred caching. Otherwise 602 * stage this page for an upcoming deferred caching update. 603 */ 604 static int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order, 605 struct page *p, enum ttm_caching page_caching, 606 struct ttm_pool_alloc_state *alloc, 607 struct ttm_pool_tt_restore *restore) 608 { 609 bool caching_consistent; 610 dma_addr_t first_dma; 611 int r = 0; 612 613 caching_consistent = (page_caching == alloc->tt_caching) || PageHighMem(p); 614 615 if (caching_consistent) { 616 r = ttm_pool_apply_caching(alloc); 617 if (r) 618 return r; 619 } 620 621 if (alloc->dma_addr) { 622 r = ttm_pool_map(pool, order, p, &first_dma); 623 if (r) 624 return r; 625 } 626 627 if (restore) { 628 ttm_pool_page_allocated_restore(pool, order, p, page_caching, 629 first_dma, restore, alloc); 630 } else { 631 ttm_pool_allocated_page_commit(p, first_dma, alloc, 1UL << order); 632 633 if (caching_consistent) 634 alloc->caching_divide = alloc->pages; 635 } 636 637 return 0; 638 } 639 640 /** 641 * ttm_pool_free_range() - Free a range of TTM pages 642 * @pool: The pool used for allocating. 643 * @tt: The struct ttm_tt holding the page pointers. 644 * @caching: The page caching mode used by the range. 645 * @start_page: index for first page to free. 646 * @end_page: index for last page to free + 1. 647 * 648 * During allocation the ttm_tt page-vector may be populated with ranges of 649 * pages with different attributes if allocation hit an error without being 650 * able to completely fulfill the allocation. This function can be used 651 * to free these individual ranges. 652 */ 653 static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, 654 enum ttm_caching caching, 655 pgoff_t start_page, pgoff_t end_page) 656 { 657 struct page **pages = &tt->pages[start_page]; 658 struct ttm_backup *backup = tt->backup; 659 pgoff_t i, nr; 660 661 for (i = start_page; i < end_page; i += nr, pages += nr) { 662 struct page *p = *pages; 663 664 nr = 1; 665 if (ttm_backup_page_ptr_is_handle(p)) { 666 unsigned long handle = ttm_backup_page_ptr_to_handle(p); 667 668 if (handle != 0) 669 ttm_backup_drop(backup, handle); 670 } else if (p) { 671 dma_addr_t *dma_addr = tt->dma_address ? 672 tt->dma_address + i : NULL; 673 674 nr = ttm_pool_unmap_and_free(pool, p, dma_addr, caching); 675 } 676 } 677 } 678 679 static void ttm_pool_alloc_state_init(const struct ttm_tt *tt, 680 struct ttm_pool_alloc_state *alloc) 681 { 682 alloc->pages = tt->pages; 683 alloc->caching_divide = tt->pages; 684 alloc->dma_addr = tt->dma_address; 685 alloc->remaining_pages = tt->num_pages; 686 alloc->tt_caching = tt->caching; 687 } 688 689 /* 690 * Find a suitable allocation order based on highest desired order 691 * and number of remaining pages 692 */ 693 static unsigned int ttm_pool_alloc_find_order(unsigned int highest, 694 const struct ttm_pool_alloc_state *alloc) 695 { 696 return min_t(unsigned int, highest, __fls(alloc->remaining_pages)); 697 } 698 699 static int __ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, 700 const struct ttm_operation_ctx *ctx, 701 struct ttm_pool_alloc_state *alloc, 702 struct ttm_pool_tt_restore *restore) 703 { 704 enum ttm_caching page_caching; 705 gfp_t gfp_flags = GFP_USER; 706 pgoff_t caching_divide; 707 unsigned int order; 708 bool allow_pools; 709 struct page *p; 710 int r; 711 712 WARN_ON(!alloc->remaining_pages || ttm_tt_is_populated(tt)); 713 WARN_ON(alloc->dma_addr && !pool->dev); 714 715 if (tt->page_flags & TTM_TT_FLAG_ZERO_ALLOC) 716 gfp_flags |= __GFP_ZERO; 717 718 if (ctx->gfp_retry_mayfail) 719 gfp_flags |= __GFP_RETRY_MAYFAIL; 720 721 if (pool->use_dma32) 722 gfp_flags |= GFP_DMA32; 723 else 724 gfp_flags |= GFP_HIGHUSER; 725 726 page_caching = tt->caching; 727 allow_pools = true; 728 for (order = ttm_pool_alloc_find_order(MAX_PAGE_ORDER, alloc); 729 alloc->remaining_pages; 730 order = ttm_pool_alloc_find_order(order, alloc)) { 731 struct ttm_pool_type *pt; 732 733 /* First, try to allocate a page from a pool if one exists. */ 734 p = NULL; 735 pt = ttm_pool_select_type(pool, page_caching, order); 736 if (pt && allow_pools) 737 p = ttm_pool_type_take(pt); 738 /* 739 * If that fails or previously failed, allocate from system. 740 * Note that this also disallows additional pool allocations using 741 * write-back cached pools of the same order. Consider removing 742 * that behaviour. 743 */ 744 if (!p) { 745 page_caching = ttm_cached; 746 allow_pools = false; 747 p = ttm_pool_alloc_page(pool, gfp_flags, order); 748 } 749 /* If that fails, lower the order if possible and retry. */ 750 if (!p) { 751 if (order) { 752 --order; 753 page_caching = tt->caching; 754 allow_pools = true; 755 continue; 756 } 757 r = -ENOMEM; 758 goto error_free_all; 759 } 760 r = ttm_pool_page_allocated(pool, order, p, page_caching, alloc, 761 restore); 762 if (r) 763 goto error_free_page; 764 765 if (ttm_pool_restore_valid(restore)) { 766 r = ttm_pool_restore_commit(restore, tt->backup, ctx, alloc); 767 if (r) 768 goto error_free_all; 769 } 770 } 771 772 r = ttm_pool_apply_caching(alloc); 773 if (r) 774 goto error_free_all; 775 776 kfree(tt->restore); 777 tt->restore = NULL; 778 779 return 0; 780 781 error_free_page: 782 ttm_pool_free_page(pool, page_caching, order, p); 783 784 error_free_all: 785 if (tt->restore) 786 return r; 787 788 caching_divide = alloc->caching_divide - tt->pages; 789 ttm_pool_free_range(pool, tt, tt->caching, 0, caching_divide); 790 ttm_pool_free_range(pool, tt, ttm_cached, caching_divide, 791 tt->num_pages - alloc->remaining_pages); 792 793 return r; 794 } 795 796 /** 797 * ttm_pool_alloc - Fill a ttm_tt object 798 * 799 * @pool: ttm_pool to use 800 * @tt: ttm_tt object to fill 801 * @ctx: operation context 802 * 803 * Fill the ttm_tt object with pages and also make sure to DMA map them when 804 * necessary. 805 * 806 * Returns: 0 on successe, negative error code otherwise. 807 */ 808 int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, 809 struct ttm_operation_ctx *ctx) 810 { 811 struct ttm_pool_alloc_state alloc; 812 813 if (WARN_ON(ttm_tt_is_backed_up(tt))) 814 return -EINVAL; 815 816 ttm_pool_alloc_state_init(tt, &alloc); 817 818 return __ttm_pool_alloc(pool, tt, ctx, &alloc, NULL); 819 } 820 EXPORT_SYMBOL(ttm_pool_alloc); 821 822 /** 823 * ttm_pool_restore_and_alloc - Fill a ttm_tt, restoring previously backed-up 824 * content. 825 * 826 * @pool: ttm_pool to use 827 * @tt: ttm_tt object to fill 828 * @ctx: operation context 829 * 830 * Fill the ttm_tt object with pages and also make sure to DMA map them when 831 * necessary. Read in backed-up content. 832 * 833 * Returns: 0 on successe, negative error code otherwise. 834 */ 835 int ttm_pool_restore_and_alloc(struct ttm_pool *pool, struct ttm_tt *tt, 836 const struct ttm_operation_ctx *ctx) 837 { 838 struct ttm_pool_alloc_state alloc; 839 840 if (WARN_ON(!ttm_tt_is_backed_up(tt))) 841 return -EINVAL; 842 843 if (!tt->restore) { 844 gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; 845 846 ttm_pool_alloc_state_init(tt, &alloc); 847 if (ctx->gfp_retry_mayfail) 848 gfp |= __GFP_RETRY_MAYFAIL; 849 850 tt->restore = kzalloc(sizeof(*tt->restore), gfp); 851 if (!tt->restore) 852 return -ENOMEM; 853 854 tt->restore->snapshot_alloc = alloc; 855 tt->restore->pool = pool; 856 tt->restore->restored_pages = 1; 857 } else { 858 struct ttm_pool_tt_restore *restore = tt->restore; 859 int ret; 860 861 alloc = restore->snapshot_alloc; 862 if (ttm_pool_restore_valid(tt->restore)) { 863 ret = ttm_pool_restore_commit(restore, tt->backup, ctx, &alloc); 864 if (ret) 865 return ret; 866 } 867 if (!alloc.remaining_pages) 868 return 0; 869 } 870 871 return __ttm_pool_alloc(pool, tt, ctx, &alloc, tt->restore); 872 } 873 874 /** 875 * ttm_pool_free - Free the backing pages from a ttm_tt object 876 * 877 * @pool: Pool to give pages back to. 878 * @tt: ttm_tt object to unpopulate 879 * 880 * Give the packing pages back to a pool or free them 881 */ 882 void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt) 883 { 884 ttm_pool_free_range(pool, tt, tt->caching, 0, tt->num_pages); 885 886 while (atomic_long_read(&allocated_pages) > page_pool_size) 887 ttm_pool_shrink(); 888 } 889 EXPORT_SYMBOL(ttm_pool_free); 890 891 /** 892 * ttm_pool_drop_backed_up() - Release content of a swapped-out struct ttm_tt 893 * @tt: The struct ttm_tt. 894 * 895 * Release handles with associated content or any remaining pages of 896 * a backed-up struct ttm_tt. 897 */ 898 void ttm_pool_drop_backed_up(struct ttm_tt *tt) 899 { 900 struct ttm_pool_tt_restore *restore; 901 pgoff_t start_page = 0; 902 903 WARN_ON(!ttm_tt_is_backed_up(tt)); 904 905 restore = tt->restore; 906 907 /* 908 * Unmap and free any uncommitted restore page. 909 * any tt page-array backup entries already read back has 910 * been cleared already 911 */ 912 if (ttm_pool_restore_valid(restore)) { 913 dma_addr_t *dma_addr = tt->dma_address ? &restore->first_dma : NULL; 914 915 ttm_pool_unmap_and_free(restore->pool, restore->alloced_page, 916 dma_addr, restore->page_caching); 917 restore->restored_pages = 1UL << restore->order; 918 } 919 920 /* 921 * If a restore is ongoing, part of the tt pages may have a 922 * caching different than writeback. 923 */ 924 if (restore) { 925 pgoff_t mid = restore->snapshot_alloc.caching_divide - tt->pages; 926 927 start_page = restore->alloced_pages; 928 WARN_ON(mid > start_page); 929 /* Pages that might be dma-mapped and non-cached */ 930 ttm_pool_free_range(restore->pool, tt, tt->caching, 931 0, mid); 932 /* Pages that might be dma-mapped but cached */ 933 ttm_pool_free_range(restore->pool, tt, ttm_cached, 934 mid, restore->alloced_pages); 935 kfree(restore); 936 tt->restore = NULL; 937 } 938 939 ttm_pool_free_range(NULL, tt, ttm_cached, start_page, tt->num_pages); 940 } 941 942 /** 943 * ttm_pool_backup() - Back up or purge a struct ttm_tt 944 * @pool: The pool used when allocating the struct ttm_tt. 945 * @tt: The struct ttm_tt. 946 * @flags: Flags to govern the backup behaviour. 947 * 948 * Back up or purge a struct ttm_tt. If @purge is true, then 949 * all pages will be freed directly to the system rather than to the pool 950 * they were allocated from, making the function behave similarly to 951 * ttm_pool_free(). If @purge is false the pages will be backed up instead, 952 * exchanged for handles. 953 * A subsequent call to ttm_pool_restore_and_alloc() will then read back the content and 954 * a subsequent call to ttm_pool_drop_backed_up() will drop it. 955 * If backup of a page fails for whatever reason, @ttm will still be 956 * partially backed up, retaining those pages for which backup fails. 957 * In that case, this function can be retried, possibly after freeing up 958 * memory resources. 959 * 960 * Return: Number of pages actually backed up or freed, or negative 961 * error code on error. 962 */ 963 long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, 964 const struct ttm_backup_flags *flags) 965 { 966 struct ttm_backup *backup = tt->backup; 967 struct page *page; 968 unsigned long handle; 969 gfp_t alloc_gfp; 970 gfp_t gfp; 971 int ret = 0; 972 pgoff_t shrunken = 0; 973 pgoff_t i, num_pages; 974 975 if (WARN_ON(ttm_tt_is_backed_up(tt))) 976 return -EINVAL; 977 978 if ((!ttm_backup_bytes_avail() && !flags->purge) || 979 pool->use_dma_alloc || ttm_tt_is_backed_up(tt)) 980 return -EBUSY; 981 982 #ifdef CONFIG_X86 983 /* Anything returned to the system needs to be cached. */ 984 if (tt->caching != ttm_cached) 985 set_pages_array_wb(tt->pages, tt->num_pages); 986 #endif 987 988 if (tt->dma_address || flags->purge) { 989 for (i = 0; i < tt->num_pages; i += num_pages) { 990 unsigned int order; 991 992 page = tt->pages[i]; 993 if (unlikely(!page)) { 994 num_pages = 1; 995 continue; 996 } 997 998 order = ttm_pool_page_order(pool, page); 999 num_pages = 1UL << order; 1000 if (tt->dma_address) 1001 ttm_pool_unmap(pool, tt->dma_address[i], 1002 num_pages); 1003 if (flags->purge) { 1004 shrunken += num_pages; 1005 page->private = 0; 1006 __free_pages(page, order); 1007 memset(tt->pages + i, 0, 1008 num_pages * sizeof(*tt->pages)); 1009 } 1010 } 1011 } 1012 1013 if (flags->purge) 1014 return shrunken; 1015 1016 if (pool->use_dma32) 1017 gfp = GFP_DMA32; 1018 else 1019 gfp = GFP_HIGHUSER; 1020 1021 alloc_gfp = GFP_KERNEL | __GFP_HIGH | __GFP_NOWARN | __GFP_RETRY_MAYFAIL; 1022 1023 num_pages = tt->num_pages; 1024 1025 /* Pretend doing fault injection by shrinking only half of the pages. */ 1026 if (IS_ENABLED(CONFIG_FAULT_INJECTION) && should_fail(&backup_fault_inject, 1)) 1027 num_pages = DIV_ROUND_UP(num_pages, 2); 1028 1029 for (i = 0; i < num_pages; ++i) { 1030 s64 shandle; 1031 1032 page = tt->pages[i]; 1033 if (unlikely(!page)) 1034 continue; 1035 1036 ttm_pool_split_for_swap(pool, page); 1037 1038 shandle = ttm_backup_backup_page(backup, page, flags->writeback, i, 1039 gfp, alloc_gfp); 1040 if (shandle < 0) { 1041 /* We allow partially shrunken tts */ 1042 ret = shandle; 1043 break; 1044 } 1045 handle = shandle; 1046 tt->pages[i] = ttm_backup_handle_to_page_ptr(handle); 1047 put_page(page); 1048 shrunken++; 1049 } 1050 1051 return shrunken ? shrunken : ret; 1052 } 1053 1054 /** 1055 * ttm_pool_init - Initialize a pool 1056 * 1057 * @pool: the pool to initialize 1058 * @dev: device for DMA allocations and mappings 1059 * @nid: NUMA node to use for allocations 1060 * @use_dma_alloc: true if coherent DMA alloc should be used 1061 * @use_dma32: true if GFP_DMA32 should be used 1062 * 1063 * Initialize the pool and its pool types. 1064 */ 1065 void ttm_pool_init(struct ttm_pool *pool, struct device *dev, 1066 int nid, bool use_dma_alloc, bool use_dma32) 1067 { 1068 unsigned int i, j; 1069 1070 WARN_ON(!dev && use_dma_alloc); 1071 1072 pool->dev = dev; 1073 pool->nid = nid; 1074 pool->use_dma_alloc = use_dma_alloc; 1075 pool->use_dma32 = use_dma32; 1076 1077 for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) { 1078 for (j = 0; j < NR_PAGE_ORDERS; ++j) { 1079 struct ttm_pool_type *pt; 1080 1081 /* Initialize only pool types which are actually used */ 1082 pt = ttm_pool_select_type(pool, i, j); 1083 if (pt != &pool->caching[i].orders[j]) 1084 continue; 1085 1086 ttm_pool_type_init(pt, pool, i, j); 1087 } 1088 } 1089 } 1090 EXPORT_SYMBOL(ttm_pool_init); 1091 1092 /** 1093 * ttm_pool_synchronize_shrinkers - Wait for all running shrinkers to complete. 1094 * 1095 * This is useful to guarantee that all shrinker invocations have seen an 1096 * update, before freeing memory, similar to rcu. 1097 */ 1098 static void ttm_pool_synchronize_shrinkers(void) 1099 { 1100 down_write(&pool_shrink_rwsem); 1101 up_write(&pool_shrink_rwsem); 1102 } 1103 1104 /** 1105 * ttm_pool_fini - Cleanup a pool 1106 * 1107 * @pool: the pool to clean up 1108 * 1109 * Free all pages in the pool and unregister the types from the global 1110 * shrinker. 1111 */ 1112 void ttm_pool_fini(struct ttm_pool *pool) 1113 { 1114 unsigned int i, j; 1115 1116 for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) { 1117 for (j = 0; j < NR_PAGE_ORDERS; ++j) { 1118 struct ttm_pool_type *pt; 1119 1120 pt = ttm_pool_select_type(pool, i, j); 1121 if (pt != &pool->caching[i].orders[j]) 1122 continue; 1123 1124 ttm_pool_type_fini(pt); 1125 } 1126 } 1127 1128 /* We removed the pool types from the LRU, but we need to also make sure 1129 * that no shrinker is concurrently freeing pages from the pool. 1130 */ 1131 ttm_pool_synchronize_shrinkers(); 1132 } 1133 EXPORT_SYMBOL(ttm_pool_fini); 1134 1135 /* As long as pages are available make sure to release at least one */ 1136 static unsigned long ttm_pool_shrinker_scan(struct shrinker *shrink, 1137 struct shrink_control *sc) 1138 { 1139 unsigned long num_freed = 0; 1140 1141 do 1142 num_freed += ttm_pool_shrink(); 1143 while (!num_freed && atomic_long_read(&allocated_pages)); 1144 1145 return num_freed; 1146 } 1147 1148 /* Return the number of pages available or SHRINK_EMPTY if we have none */ 1149 static unsigned long ttm_pool_shrinker_count(struct shrinker *shrink, 1150 struct shrink_control *sc) 1151 { 1152 unsigned long num_pages = atomic_long_read(&allocated_pages); 1153 1154 return num_pages ? num_pages : SHRINK_EMPTY; 1155 } 1156 1157 #ifdef CONFIG_DEBUG_FS 1158 /* Count the number of pages available in a pool_type */ 1159 static unsigned int ttm_pool_type_count(struct ttm_pool_type *pt) 1160 { 1161 unsigned int count = 0; 1162 struct page *p; 1163 1164 spin_lock(&pt->lock); 1165 /* Only used for debugfs, the overhead doesn't matter */ 1166 list_for_each_entry(p, &pt->pages, lru) 1167 ++count; 1168 spin_unlock(&pt->lock); 1169 1170 return count; 1171 } 1172 1173 /* Print a nice header for the order */ 1174 static void ttm_pool_debugfs_header(struct seq_file *m) 1175 { 1176 unsigned int i; 1177 1178 seq_puts(m, "\t "); 1179 for (i = 0; i < NR_PAGE_ORDERS; ++i) 1180 seq_printf(m, " ---%2u---", i); 1181 seq_puts(m, "\n"); 1182 } 1183 1184 /* Dump information about the different pool types */ 1185 static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, 1186 struct seq_file *m) 1187 { 1188 unsigned int i; 1189 1190 for (i = 0; i < NR_PAGE_ORDERS; ++i) 1191 seq_printf(m, " %8u", ttm_pool_type_count(&pt[i])); 1192 seq_puts(m, "\n"); 1193 } 1194 1195 /* Dump the total amount of allocated pages */ 1196 static void ttm_pool_debugfs_footer(struct seq_file *m) 1197 { 1198 seq_printf(m, "\ntotal\t: %8lu of %8lu\n", 1199 atomic_long_read(&allocated_pages), page_pool_size); 1200 } 1201 1202 /* Dump the information for the global pools */ 1203 static int ttm_pool_debugfs_globals_show(struct seq_file *m, void *data) 1204 { 1205 ttm_pool_debugfs_header(m); 1206 1207 spin_lock(&shrinker_lock); 1208 seq_puts(m, "wc\t:"); 1209 ttm_pool_debugfs_orders(global_write_combined, m); 1210 seq_puts(m, "uc\t:"); 1211 ttm_pool_debugfs_orders(global_uncached, m); 1212 seq_puts(m, "wc 32\t:"); 1213 ttm_pool_debugfs_orders(global_dma32_write_combined, m); 1214 seq_puts(m, "uc 32\t:"); 1215 ttm_pool_debugfs_orders(global_dma32_uncached, m); 1216 spin_unlock(&shrinker_lock); 1217 1218 ttm_pool_debugfs_footer(m); 1219 1220 return 0; 1221 } 1222 DEFINE_SHOW_ATTRIBUTE(ttm_pool_debugfs_globals); 1223 1224 /** 1225 * ttm_pool_debugfs - Debugfs dump function for a pool 1226 * 1227 * @pool: the pool to dump the information for 1228 * @m: seq_file to dump to 1229 * 1230 * Make a debugfs dump with the per pool and global information. 1231 */ 1232 int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) 1233 { 1234 unsigned int i; 1235 1236 if (!pool->use_dma_alloc) { 1237 seq_puts(m, "unused\n"); 1238 return 0; 1239 } 1240 1241 ttm_pool_debugfs_header(m); 1242 1243 spin_lock(&shrinker_lock); 1244 for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) { 1245 seq_puts(m, "DMA "); 1246 switch (i) { 1247 case ttm_cached: 1248 seq_puts(m, "\t:"); 1249 break; 1250 case ttm_write_combined: 1251 seq_puts(m, "wc\t:"); 1252 break; 1253 case ttm_uncached: 1254 seq_puts(m, "uc\t:"); 1255 break; 1256 } 1257 ttm_pool_debugfs_orders(pool->caching[i].orders, m); 1258 } 1259 spin_unlock(&shrinker_lock); 1260 1261 ttm_pool_debugfs_footer(m); 1262 return 0; 1263 } 1264 EXPORT_SYMBOL(ttm_pool_debugfs); 1265 1266 /* Test the shrinker functions and dump the result */ 1267 static int ttm_pool_debugfs_shrink_show(struct seq_file *m, void *data) 1268 { 1269 struct shrink_control sc = { .gfp_mask = GFP_NOFS }; 1270 1271 fs_reclaim_acquire(GFP_KERNEL); 1272 seq_printf(m, "%lu/%lu\n", ttm_pool_shrinker_count(mm_shrinker, &sc), 1273 ttm_pool_shrinker_scan(mm_shrinker, &sc)); 1274 fs_reclaim_release(GFP_KERNEL); 1275 1276 return 0; 1277 } 1278 DEFINE_SHOW_ATTRIBUTE(ttm_pool_debugfs_shrink); 1279 1280 #endif 1281 1282 /** 1283 * ttm_pool_mgr_init - Initialize globals 1284 * 1285 * @num_pages: default number of pages 1286 * 1287 * Initialize the global locks and lists for the MM shrinker. 1288 */ 1289 int ttm_pool_mgr_init(unsigned long num_pages) 1290 { 1291 unsigned int i; 1292 1293 if (!page_pool_size) 1294 page_pool_size = num_pages; 1295 1296 spin_lock_init(&shrinker_lock); 1297 INIT_LIST_HEAD(&shrinker_list); 1298 1299 for (i = 0; i < NR_PAGE_ORDERS; ++i) { 1300 ttm_pool_type_init(&global_write_combined[i], NULL, 1301 ttm_write_combined, i); 1302 ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i); 1303 1304 ttm_pool_type_init(&global_dma32_write_combined[i], NULL, 1305 ttm_write_combined, i); 1306 ttm_pool_type_init(&global_dma32_uncached[i], NULL, 1307 ttm_uncached, i); 1308 } 1309 1310 #ifdef CONFIG_DEBUG_FS 1311 debugfs_create_file("page_pool", 0444, ttm_debugfs_root, NULL, 1312 &ttm_pool_debugfs_globals_fops); 1313 debugfs_create_file("page_pool_shrink", 0400, ttm_debugfs_root, NULL, 1314 &ttm_pool_debugfs_shrink_fops); 1315 #ifdef CONFIG_FAULT_INJECTION 1316 fault_create_debugfs_attr("backup_fault_inject", ttm_debugfs_root, 1317 &backup_fault_inject); 1318 #endif 1319 #endif 1320 1321 mm_shrinker = shrinker_alloc(0, "drm-ttm_pool"); 1322 if (!mm_shrinker) 1323 return -ENOMEM; 1324 1325 mm_shrinker->count_objects = ttm_pool_shrinker_count; 1326 mm_shrinker->scan_objects = ttm_pool_shrinker_scan; 1327 mm_shrinker->seeks = 1; 1328 1329 shrinker_register(mm_shrinker); 1330 1331 return 0; 1332 } 1333 1334 /** 1335 * ttm_pool_mgr_fini - Finalize globals 1336 * 1337 * Cleanup the global pools and unregister the MM shrinker. 1338 */ 1339 void ttm_pool_mgr_fini(void) 1340 { 1341 unsigned int i; 1342 1343 for (i = 0; i < NR_PAGE_ORDERS; ++i) { 1344 ttm_pool_type_fini(&global_write_combined[i]); 1345 ttm_pool_type_fini(&global_uncached[i]); 1346 1347 ttm_pool_type_fini(&global_dma32_write_combined[i]); 1348 ttm_pool_type_fini(&global_dma32_uncached[i]); 1349 } 1350 1351 shrinker_free(mm_shrinker); 1352 WARN_ON(!list_empty(&shrinker_list)); 1353 } 1354