1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * DAMON Primitives for The Physical Address Space 4 * 5 * Author: SeongJae Park <sj@kernel.org> 6 */ 7 8 #define pr_fmt(fmt) "damon-pa: " fmt 9 10 #include <linux/mmu_notifier.h> 11 #include <linux/page_idle.h> 12 #include <linux/pagemap.h> 13 #include <linux/rmap.h> 14 #include <linux/swap.h> 15 #include <linux/memory-tiers.h> 16 #include <linux/migrate.h> 17 #include <linux/mm_inline.h> 18 19 #include "../internal.h" 20 #include "ops-common.h" 21 22 static bool damon_folio_mkold_one(struct folio *folio, 23 struct vm_area_struct *vma, unsigned long addr, void *arg) 24 { 25 DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0); 26 27 while (page_vma_mapped_walk(&pvmw)) { 28 addr = pvmw.address; 29 if (pvmw.pte) 30 damon_ptep_mkold(pvmw.pte, vma, addr); 31 else 32 damon_pmdp_mkold(pvmw.pmd, vma, addr); 33 } 34 return true; 35 } 36 37 static void damon_folio_mkold(struct folio *folio) 38 { 39 struct rmap_walk_control rwc = { 40 .rmap_one = damon_folio_mkold_one, 41 .anon_lock = folio_lock_anon_vma_read, 42 }; 43 bool need_lock; 44 45 if (!folio_mapped(folio) || !folio_raw_mapping(folio)) { 46 folio_set_idle(folio); 47 return; 48 } 49 50 need_lock = !folio_test_anon(folio) || folio_test_ksm(folio); 51 if (need_lock && !folio_trylock(folio)) 52 return; 53 54 rmap_walk(folio, &rwc); 55 56 if (need_lock) 57 folio_unlock(folio); 58 59 } 60 61 static void damon_pa_mkold(unsigned long paddr) 62 { 63 struct folio *folio = damon_get_folio(PHYS_PFN(paddr)); 64 65 if (!folio) 66 return; 67 68 damon_folio_mkold(folio); 69 folio_put(folio); 70 } 71 72 static void __damon_pa_prepare_access_check(struct damon_region *r) 73 { 74 r->sampling_addr = damon_rand(r->ar.start, r->ar.end); 75 76 damon_pa_mkold(r->sampling_addr); 77 } 78 79 static void damon_pa_prepare_access_checks(struct damon_ctx *ctx) 80 { 81 struct damon_target *t; 82 struct damon_region *r; 83 84 damon_for_each_target(t, ctx) { 85 damon_for_each_region(r, t) 86 __damon_pa_prepare_access_check(r); 87 } 88 } 89 90 static bool damon_folio_young_one(struct folio *folio, 91 struct vm_area_struct *vma, unsigned long addr, void *arg) 92 { 93 bool *accessed = arg; 94 DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0); 95 pte_t pte; 96 97 *accessed = false; 98 while (page_vma_mapped_walk(&pvmw)) { 99 addr = pvmw.address; 100 if (pvmw.pte) { 101 pte = ptep_get(pvmw.pte); 102 103 /* 104 * PFN swap PTEs, such as device-exclusive ones, that 105 * actually map pages are "old" from a CPU perspective. 106 * The MMU notifier takes care of any device aspects. 107 */ 108 *accessed = (pte_present(pte) && pte_young(pte)) || 109 !folio_test_idle(folio) || 110 mmu_notifier_test_young(vma->vm_mm, addr); 111 } else { 112 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 113 *accessed = pmd_young(pmdp_get(pvmw.pmd)) || 114 !folio_test_idle(folio) || 115 mmu_notifier_test_young(vma->vm_mm, addr); 116 #else 117 WARN_ON_ONCE(1); 118 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 119 } 120 if (*accessed) { 121 page_vma_mapped_walk_done(&pvmw); 122 break; 123 } 124 } 125 126 /* If accessed, stop walking */ 127 return *accessed == false; 128 } 129 130 static bool damon_folio_young(struct folio *folio) 131 { 132 bool accessed = false; 133 struct rmap_walk_control rwc = { 134 .arg = &accessed, 135 .rmap_one = damon_folio_young_one, 136 .anon_lock = folio_lock_anon_vma_read, 137 }; 138 bool need_lock; 139 140 if (!folio_mapped(folio) || !folio_raw_mapping(folio)) { 141 if (folio_test_idle(folio)) 142 return false; 143 else 144 return true; 145 } 146 147 need_lock = !folio_test_anon(folio) || folio_test_ksm(folio); 148 if (need_lock && !folio_trylock(folio)) 149 return false; 150 151 rmap_walk(folio, &rwc); 152 153 if (need_lock) 154 folio_unlock(folio); 155 156 return accessed; 157 } 158 159 static bool damon_pa_young(unsigned long paddr, unsigned long *folio_sz) 160 { 161 struct folio *folio = damon_get_folio(PHYS_PFN(paddr)); 162 bool accessed; 163 164 if (!folio) 165 return false; 166 167 accessed = damon_folio_young(folio); 168 *folio_sz = folio_size(folio); 169 folio_put(folio); 170 return accessed; 171 } 172 173 static void __damon_pa_check_access(struct damon_region *r, 174 struct damon_attrs *attrs) 175 { 176 static unsigned long last_addr; 177 static unsigned long last_folio_sz = PAGE_SIZE; 178 static bool last_accessed; 179 180 /* If the region is in the last checked page, reuse the result */ 181 if (ALIGN_DOWN(last_addr, last_folio_sz) == 182 ALIGN_DOWN(r->sampling_addr, last_folio_sz)) { 183 damon_update_region_access_rate(r, last_accessed, attrs); 184 return; 185 } 186 187 last_accessed = damon_pa_young(r->sampling_addr, &last_folio_sz); 188 damon_update_region_access_rate(r, last_accessed, attrs); 189 190 last_addr = r->sampling_addr; 191 } 192 193 static unsigned int damon_pa_check_accesses(struct damon_ctx *ctx) 194 { 195 struct damon_target *t; 196 struct damon_region *r; 197 unsigned int max_nr_accesses = 0; 198 199 damon_for_each_target(t, ctx) { 200 damon_for_each_region(r, t) { 201 __damon_pa_check_access(r, &ctx->attrs); 202 max_nr_accesses = max(r->nr_accesses, max_nr_accesses); 203 } 204 } 205 206 return max_nr_accesses; 207 } 208 209 static bool damos_pa_filter_match(struct damos_filter *filter, 210 struct folio *folio) 211 { 212 bool matched = false; 213 struct mem_cgroup *memcg; 214 size_t folio_sz; 215 216 switch (filter->type) { 217 case DAMOS_FILTER_TYPE_ANON: 218 matched = folio_test_anon(folio); 219 break; 220 case DAMOS_FILTER_TYPE_ACTIVE: 221 matched = folio_test_active(folio); 222 break; 223 case DAMOS_FILTER_TYPE_MEMCG: 224 rcu_read_lock(); 225 memcg = folio_memcg_check(folio); 226 if (!memcg) 227 matched = false; 228 else 229 matched = filter->memcg_id == mem_cgroup_id(memcg); 230 rcu_read_unlock(); 231 break; 232 case DAMOS_FILTER_TYPE_YOUNG: 233 matched = damon_folio_young(folio); 234 if (matched) 235 damon_folio_mkold(folio); 236 break; 237 case DAMOS_FILTER_TYPE_HUGEPAGE_SIZE: 238 folio_sz = folio_size(folio); 239 matched = filter->sz_range.min <= folio_sz && 240 folio_sz <= filter->sz_range.max; 241 break; 242 case DAMOS_FILTER_TYPE_UNMAPPED: 243 matched = !folio_mapped(folio) || !folio_raw_mapping(folio); 244 break; 245 default: 246 break; 247 } 248 249 return matched == filter->matching; 250 } 251 252 /* 253 * damos_pa_filter_out - Return true if the page should be filtered out. 254 */ 255 static bool damos_pa_filter_out(struct damos *scheme, struct folio *folio) 256 { 257 struct damos_filter *filter; 258 259 if (scheme->core_filters_allowed) 260 return false; 261 262 damos_for_each_ops_filter(filter, scheme) { 263 if (damos_pa_filter_match(filter, folio)) 264 return !filter->allow; 265 } 266 return scheme->ops_filters_default_reject; 267 } 268 269 static bool damon_pa_invalid_damos_folio(struct folio *folio, struct damos *s) 270 { 271 if (!folio) 272 return true; 273 if (folio == s->last_applied) { 274 folio_put(folio); 275 return true; 276 } 277 return false; 278 } 279 280 static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s, 281 unsigned long *sz_filter_passed) 282 { 283 unsigned long addr, applied; 284 LIST_HEAD(folio_list); 285 bool install_young_filter = true; 286 struct damos_filter *filter; 287 struct folio *folio; 288 289 /* check access in page level again by default */ 290 damos_for_each_ops_filter(filter, s) { 291 if (filter->type == DAMOS_FILTER_TYPE_YOUNG) { 292 install_young_filter = false; 293 break; 294 } 295 } 296 if (install_young_filter) { 297 filter = damos_new_filter( 298 DAMOS_FILTER_TYPE_YOUNG, true, false); 299 if (!filter) 300 return 0; 301 damos_add_filter(s, filter); 302 } 303 304 addr = r->ar.start; 305 while (addr < r->ar.end) { 306 folio = damon_get_folio(PHYS_PFN(addr)); 307 if (damon_pa_invalid_damos_folio(folio, s)) { 308 addr += PAGE_SIZE; 309 continue; 310 } 311 312 if (damos_pa_filter_out(s, folio)) 313 goto put_folio; 314 else 315 *sz_filter_passed += folio_size(folio); 316 317 folio_clear_referenced(folio); 318 folio_test_clear_young(folio); 319 if (!folio_isolate_lru(folio)) 320 goto put_folio; 321 if (folio_test_unevictable(folio)) 322 folio_putback_lru(folio); 323 else 324 list_add(&folio->lru, &folio_list); 325 put_folio: 326 addr += folio_size(folio); 327 folio_put(folio); 328 } 329 if (install_young_filter) 330 damos_destroy_filter(filter); 331 applied = reclaim_pages(&folio_list); 332 cond_resched(); 333 s->last_applied = folio; 334 return applied * PAGE_SIZE; 335 } 336 337 static inline unsigned long damon_pa_mark_accessed_or_deactivate( 338 struct damon_region *r, struct damos *s, bool mark_accessed, 339 unsigned long *sz_filter_passed) 340 { 341 unsigned long addr, applied = 0; 342 struct folio *folio; 343 344 addr = r->ar.start; 345 while (addr < r->ar.end) { 346 folio = damon_get_folio(PHYS_PFN(addr)); 347 if (damon_pa_invalid_damos_folio(folio, s)) { 348 addr += PAGE_SIZE; 349 continue; 350 } 351 352 if (damos_pa_filter_out(s, folio)) 353 goto put_folio; 354 else 355 *sz_filter_passed += folio_size(folio); 356 357 if (mark_accessed) 358 folio_mark_accessed(folio); 359 else 360 folio_deactivate(folio); 361 applied += folio_nr_pages(folio); 362 put_folio: 363 addr += folio_size(folio); 364 folio_put(folio); 365 } 366 s->last_applied = folio; 367 return applied * PAGE_SIZE; 368 } 369 370 static unsigned long damon_pa_mark_accessed(struct damon_region *r, 371 struct damos *s, unsigned long *sz_filter_passed) 372 { 373 return damon_pa_mark_accessed_or_deactivate(r, s, true, 374 sz_filter_passed); 375 } 376 377 static unsigned long damon_pa_deactivate_pages(struct damon_region *r, 378 struct damos *s, unsigned long *sz_filter_passed) 379 { 380 return damon_pa_mark_accessed_or_deactivate(r, s, false, 381 sz_filter_passed); 382 } 383 384 static unsigned int __damon_pa_migrate_folio_list( 385 struct list_head *migrate_folios, struct pglist_data *pgdat, 386 int target_nid) 387 { 388 unsigned int nr_succeeded = 0; 389 nodemask_t allowed_mask = NODE_MASK_NONE; 390 struct migration_target_control mtc = { 391 /* 392 * Allocate from 'node', or fail quickly and quietly. 393 * When this happens, 'page' will likely just be discarded 394 * instead of migrated. 395 */ 396 .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) | 397 __GFP_NOWARN | __GFP_NOMEMALLOC | GFP_NOWAIT, 398 .nid = target_nid, 399 .nmask = &allowed_mask 400 }; 401 402 if (pgdat->node_id == target_nid || target_nid == NUMA_NO_NODE) 403 return 0; 404 405 if (list_empty(migrate_folios)) 406 return 0; 407 408 /* Migration ignores all cpuset and mempolicy settings */ 409 migrate_pages(migrate_folios, alloc_migrate_folio, NULL, 410 (unsigned long)&mtc, MIGRATE_ASYNC, MR_DAMON, 411 &nr_succeeded); 412 413 return nr_succeeded; 414 } 415 416 static unsigned int damon_pa_migrate_folio_list(struct list_head *folio_list, 417 struct pglist_data *pgdat, 418 int target_nid) 419 { 420 unsigned int nr_migrated = 0; 421 struct folio *folio; 422 LIST_HEAD(ret_folios); 423 LIST_HEAD(migrate_folios); 424 425 while (!list_empty(folio_list)) { 426 struct folio *folio; 427 428 cond_resched(); 429 430 folio = lru_to_folio(folio_list); 431 list_del(&folio->lru); 432 433 if (!folio_trylock(folio)) 434 goto keep; 435 436 /* Relocate its contents to another node. */ 437 list_add(&folio->lru, &migrate_folios); 438 folio_unlock(folio); 439 continue; 440 keep: 441 list_add(&folio->lru, &ret_folios); 442 } 443 /* 'folio_list' is always empty here */ 444 445 /* Migrate folios selected for migration */ 446 nr_migrated += __damon_pa_migrate_folio_list( 447 &migrate_folios, pgdat, target_nid); 448 /* 449 * Folios that could not be migrated are still in @migrate_folios. Add 450 * those back on @folio_list 451 */ 452 if (!list_empty(&migrate_folios)) 453 list_splice_init(&migrate_folios, folio_list); 454 455 try_to_unmap_flush(); 456 457 list_splice(&ret_folios, folio_list); 458 459 while (!list_empty(folio_list)) { 460 folio = lru_to_folio(folio_list); 461 list_del(&folio->lru); 462 folio_putback_lru(folio); 463 } 464 465 return nr_migrated; 466 } 467 468 static unsigned long damon_pa_migrate_pages(struct list_head *folio_list, 469 int target_nid) 470 { 471 int nid; 472 unsigned long nr_migrated = 0; 473 LIST_HEAD(node_folio_list); 474 unsigned int noreclaim_flag; 475 476 if (list_empty(folio_list)) 477 return nr_migrated; 478 479 noreclaim_flag = memalloc_noreclaim_save(); 480 481 nid = folio_nid(lru_to_folio(folio_list)); 482 do { 483 struct folio *folio = lru_to_folio(folio_list); 484 485 if (nid == folio_nid(folio)) { 486 list_move(&folio->lru, &node_folio_list); 487 continue; 488 } 489 490 nr_migrated += damon_pa_migrate_folio_list(&node_folio_list, 491 NODE_DATA(nid), 492 target_nid); 493 nid = folio_nid(lru_to_folio(folio_list)); 494 } while (!list_empty(folio_list)); 495 496 nr_migrated += damon_pa_migrate_folio_list(&node_folio_list, 497 NODE_DATA(nid), 498 target_nid); 499 500 memalloc_noreclaim_restore(noreclaim_flag); 501 502 return nr_migrated; 503 } 504 505 static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s, 506 unsigned long *sz_filter_passed) 507 { 508 unsigned long addr, applied; 509 LIST_HEAD(folio_list); 510 struct folio *folio; 511 512 addr = r->ar.start; 513 while (addr < r->ar.end) { 514 folio = damon_get_folio(PHYS_PFN(addr)); 515 if (damon_pa_invalid_damos_folio(folio, s)) { 516 addr += PAGE_SIZE; 517 continue; 518 } 519 520 if (damos_pa_filter_out(s, folio)) 521 goto put_folio; 522 else 523 *sz_filter_passed += folio_size(folio); 524 525 if (!folio_isolate_lru(folio)) 526 goto put_folio; 527 list_add(&folio->lru, &folio_list); 528 put_folio: 529 addr += folio_size(folio); 530 folio_put(folio); 531 } 532 applied = damon_pa_migrate_pages(&folio_list, s->target_nid); 533 cond_resched(); 534 s->last_applied = folio; 535 return applied * PAGE_SIZE; 536 } 537 538 static bool damon_pa_scheme_has_filter(struct damos *s) 539 { 540 struct damos_filter *f; 541 542 damos_for_each_ops_filter(f, s) 543 return true; 544 return false; 545 } 546 547 static unsigned long damon_pa_stat(struct damon_region *r, struct damos *s, 548 unsigned long *sz_filter_passed) 549 { 550 unsigned long addr; 551 struct folio *folio; 552 553 if (!damon_pa_scheme_has_filter(s)) 554 return 0; 555 556 addr = r->ar.start; 557 while (addr < r->ar.end) { 558 folio = damon_get_folio(PHYS_PFN(addr)); 559 if (damon_pa_invalid_damos_folio(folio, s)) { 560 addr += PAGE_SIZE; 561 continue; 562 } 563 564 if (!damos_pa_filter_out(s, folio)) 565 *sz_filter_passed += folio_size(folio); 566 addr += folio_size(folio); 567 folio_put(folio); 568 } 569 s->last_applied = folio; 570 return 0; 571 } 572 573 static unsigned long damon_pa_apply_scheme(struct damon_ctx *ctx, 574 struct damon_target *t, struct damon_region *r, 575 struct damos *scheme, unsigned long *sz_filter_passed) 576 { 577 switch (scheme->action) { 578 case DAMOS_PAGEOUT: 579 return damon_pa_pageout(r, scheme, sz_filter_passed); 580 case DAMOS_LRU_PRIO: 581 return damon_pa_mark_accessed(r, scheme, sz_filter_passed); 582 case DAMOS_LRU_DEPRIO: 583 return damon_pa_deactivate_pages(r, scheme, sz_filter_passed); 584 case DAMOS_MIGRATE_HOT: 585 case DAMOS_MIGRATE_COLD: 586 return damon_pa_migrate(r, scheme, sz_filter_passed); 587 case DAMOS_STAT: 588 return damon_pa_stat(r, scheme, sz_filter_passed); 589 default: 590 /* DAMOS actions that not yet supported by 'paddr'. */ 591 break; 592 } 593 return 0; 594 } 595 596 static int damon_pa_scheme_score(struct damon_ctx *context, 597 struct damon_target *t, struct damon_region *r, 598 struct damos *scheme) 599 { 600 switch (scheme->action) { 601 case DAMOS_PAGEOUT: 602 return damon_cold_score(context, r, scheme); 603 case DAMOS_LRU_PRIO: 604 return damon_hot_score(context, r, scheme); 605 case DAMOS_LRU_DEPRIO: 606 return damon_cold_score(context, r, scheme); 607 case DAMOS_MIGRATE_HOT: 608 return damon_hot_score(context, r, scheme); 609 case DAMOS_MIGRATE_COLD: 610 return damon_cold_score(context, r, scheme); 611 default: 612 break; 613 } 614 615 return DAMOS_MAX_SCORE; 616 } 617 618 static int __init damon_pa_initcall(void) 619 { 620 struct damon_operations ops = { 621 .id = DAMON_OPS_PADDR, 622 .init = NULL, 623 .update = NULL, 624 .prepare_access_checks = damon_pa_prepare_access_checks, 625 .check_accesses = damon_pa_check_accesses, 626 .target_valid = NULL, 627 .cleanup = NULL, 628 .apply_scheme = damon_pa_apply_scheme, 629 .get_scheme_score = damon_pa_scheme_score, 630 }; 631 632 return damon_register_ops(&ops); 633 }; 634 635 subsys_initcall(damon_pa_initcall); 636