1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. 4 * Authors: David Chinner and Glauber Costa 5 * 6 * Generic LRU infrastructure 7 */ 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/mm.h> 11 #include <linux/list_lru.h> 12 #include <linux/slab.h> 13 #include <linux/mutex.h> 14 #include <linux/memcontrol.h> 15 #include "slab.h" 16 #include "internal.h" 17 18 #ifdef CONFIG_MEMCG 19 static LIST_HEAD(memcg_list_lrus); 20 static DEFINE_MUTEX(list_lrus_mutex); 21 22 static inline bool list_lru_memcg_aware(struct list_lru *lru) 23 { 24 return lru->memcg_aware; 25 } 26 27 static void list_lru_register(struct list_lru *lru) 28 { 29 if (!list_lru_memcg_aware(lru)) 30 return; 31 32 mutex_lock(&list_lrus_mutex); 33 list_add(&lru->list, &memcg_list_lrus); 34 mutex_unlock(&list_lrus_mutex); 35 } 36 37 static void list_lru_unregister(struct list_lru *lru) 38 { 39 if (!list_lru_memcg_aware(lru)) 40 return; 41 42 mutex_lock(&list_lrus_mutex); 43 list_del(&lru->list); 44 mutex_unlock(&list_lrus_mutex); 45 } 46 47 static int lru_shrinker_id(struct list_lru *lru) 48 { 49 return lru->shrinker_id; 50 } 51 52 static inline struct list_lru_one * 53 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx) 54 { 55 if (list_lru_memcg_aware(lru) && idx >= 0) { 56 struct list_lru_memcg *mlru = xa_load(&lru->xa, idx); 57 58 return mlru ? &mlru->node[nid] : NULL; 59 } 60 return &lru->node[nid].lru; 61 } 62 63 static inline bool lock_list_lru(struct list_lru_one *l, bool irq) 64 { 65 if (irq) 66 spin_lock_irq(&l->lock); 67 else 68 spin_lock(&l->lock); 69 if (unlikely(READ_ONCE(l->nr_items) == LONG_MIN)) { 70 if (irq) 71 spin_unlock_irq(&l->lock); 72 else 73 spin_unlock(&l->lock); 74 return false; 75 } 76 return true; 77 } 78 79 static inline struct list_lru_one * 80 lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 81 bool irq, bool skip_empty) 82 { 83 struct list_lru_one *l; 84 85 rcu_read_lock(); 86 again: 87 l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); 88 if (likely(l) && lock_list_lru(l, irq)) { 89 rcu_read_unlock(); 90 return l; 91 } 92 /* 93 * Caller may simply bail out if raced with reparenting or 94 * may iterate through the list_lru and expect empty slots. 95 */ 96 if (skip_empty) { 97 rcu_read_unlock(); 98 return NULL; 99 } 100 VM_WARN_ON(!css_is_dying(&memcg->css)); 101 memcg = parent_mem_cgroup(memcg); 102 goto again; 103 } 104 105 static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off) 106 { 107 if (irq_off) 108 spin_unlock_irq(&l->lock); 109 else 110 spin_unlock(&l->lock); 111 } 112 #else 113 static void list_lru_register(struct list_lru *lru) 114 { 115 } 116 117 static void list_lru_unregister(struct list_lru *lru) 118 { 119 } 120 121 static int lru_shrinker_id(struct list_lru *lru) 122 { 123 return -1; 124 } 125 126 static inline bool list_lru_memcg_aware(struct list_lru *lru) 127 { 128 return false; 129 } 130 131 static inline struct list_lru_one * 132 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx) 133 { 134 return &lru->node[nid].lru; 135 } 136 137 static inline struct list_lru_one * 138 lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 139 bool irq, bool skip_empty) 140 { 141 struct list_lru_one *l = &lru->node[nid].lru; 142 143 if (irq) 144 spin_lock_irq(&l->lock); 145 else 146 spin_lock(&l->lock); 147 148 return l; 149 } 150 151 static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off) 152 { 153 if (irq_off) 154 spin_unlock_irq(&l->lock); 155 else 156 spin_unlock(&l->lock); 157 } 158 #endif /* CONFIG_MEMCG */ 159 160 /* The caller must ensure the memcg lifetime. */ 161 bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid, 162 struct mem_cgroup *memcg) 163 { 164 struct list_lru_node *nlru = &lru->node[nid]; 165 struct list_lru_one *l; 166 167 l = lock_list_lru_of_memcg(lru, nid, memcg, false, false); 168 if (!l) 169 return false; 170 if (list_empty(item)) { 171 list_add_tail(item, &l->list); 172 /* Set shrinker bit if the first element was added */ 173 if (!l->nr_items++) 174 set_shrinker_bit(memcg, nid, lru_shrinker_id(lru)); 175 unlock_list_lru(l, false); 176 atomic_long_inc(&nlru->nr_items); 177 return true; 178 } 179 unlock_list_lru(l, false); 180 return false; 181 } 182 EXPORT_SYMBOL_GPL(list_lru_add); 183 184 bool list_lru_add_obj(struct list_lru *lru, struct list_head *item) 185 { 186 bool ret; 187 int nid = page_to_nid(virt_to_page(item)); 188 189 if (list_lru_memcg_aware(lru)) { 190 rcu_read_lock(); 191 ret = list_lru_add(lru, item, nid, mem_cgroup_from_virt(item)); 192 rcu_read_unlock(); 193 } else { 194 ret = list_lru_add(lru, item, nid, NULL); 195 } 196 197 return ret; 198 } 199 EXPORT_SYMBOL_GPL(list_lru_add_obj); 200 201 /* The caller must ensure the memcg lifetime. */ 202 bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid, 203 struct mem_cgroup *memcg) 204 { 205 struct list_lru_node *nlru = &lru->node[nid]; 206 struct list_lru_one *l; 207 l = lock_list_lru_of_memcg(lru, nid, memcg, false, false); 208 if (!l) 209 return false; 210 if (!list_empty(item)) { 211 list_del_init(item); 212 l->nr_items--; 213 unlock_list_lru(l, false); 214 atomic_long_dec(&nlru->nr_items); 215 return true; 216 } 217 unlock_list_lru(l, false); 218 return false; 219 } 220 221 bool list_lru_del_obj(struct list_lru *lru, struct list_head *item) 222 { 223 bool ret; 224 int nid = page_to_nid(virt_to_page(item)); 225 226 if (list_lru_memcg_aware(lru)) { 227 rcu_read_lock(); 228 ret = list_lru_del(lru, item, nid, mem_cgroup_from_virt(item)); 229 rcu_read_unlock(); 230 } else { 231 ret = list_lru_del(lru, item, nid, NULL); 232 } 233 234 return ret; 235 } 236 EXPORT_SYMBOL_GPL(list_lru_del_obj); 237 238 void list_lru_isolate(struct list_lru_one *list, struct list_head *item) 239 { 240 list_del_init(item); 241 list->nr_items--; 242 } 243 EXPORT_SYMBOL_GPL(list_lru_isolate); 244 245 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, 246 struct list_head *head) 247 { 248 list_move(item, head); 249 list->nr_items--; 250 } 251 EXPORT_SYMBOL_GPL(list_lru_isolate_move); 252 253 unsigned long list_lru_count_one(struct list_lru *lru, 254 int nid, struct mem_cgroup *memcg) 255 { 256 struct list_lru_one *l; 257 long count; 258 259 rcu_read_lock(); 260 l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); 261 count = l ? READ_ONCE(l->nr_items) : 0; 262 rcu_read_unlock(); 263 264 if (unlikely(count < 0)) 265 count = 0; 266 267 return count; 268 } 269 EXPORT_SYMBOL_GPL(list_lru_count_one); 270 271 unsigned long list_lru_count_node(struct list_lru *lru, int nid) 272 { 273 struct list_lru_node *nlru; 274 275 nlru = &lru->node[nid]; 276 return atomic_long_read(&nlru->nr_items); 277 } 278 EXPORT_SYMBOL_GPL(list_lru_count_node); 279 280 static unsigned long 281 __list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 282 list_lru_walk_cb isolate, void *cb_arg, 283 unsigned long *nr_to_walk, bool irq_off) 284 { 285 struct list_lru_node *nlru = &lru->node[nid]; 286 struct list_lru_one *l = NULL; 287 struct list_head *item, *n; 288 unsigned long isolated = 0; 289 290 restart: 291 l = lock_list_lru_of_memcg(lru, nid, memcg, irq_off, true); 292 if (!l) 293 return isolated; 294 list_for_each_safe(item, n, &l->list) { 295 enum lru_status ret; 296 297 /* 298 * decrement nr_to_walk first so that we don't livelock if we 299 * get stuck on large numbers of LRU_RETRY items 300 */ 301 if (!*nr_to_walk) 302 break; 303 --*nr_to_walk; 304 305 ret = isolate(item, l, cb_arg); 306 switch (ret) { 307 /* 308 * LRU_RETRY, LRU_REMOVED_RETRY and LRU_STOP will drop the lru 309 * lock. List traversal will have to restart from scratch. 310 */ 311 case LRU_RETRY: 312 goto restart; 313 case LRU_REMOVED_RETRY: 314 fallthrough; 315 case LRU_REMOVED: 316 isolated++; 317 atomic_long_dec(&nlru->nr_items); 318 if (ret == LRU_REMOVED_RETRY) 319 goto restart; 320 break; 321 case LRU_ROTATE: 322 list_move_tail(item, &l->list); 323 break; 324 case LRU_SKIP: 325 break; 326 case LRU_STOP: 327 goto out; 328 default: 329 BUG(); 330 } 331 } 332 unlock_list_lru(l, irq_off); 333 out: 334 return isolated; 335 } 336 337 unsigned long 338 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 339 list_lru_walk_cb isolate, void *cb_arg, 340 unsigned long *nr_to_walk) 341 { 342 return __list_lru_walk_one(lru, nid, memcg, isolate, 343 cb_arg, nr_to_walk, false); 344 } 345 EXPORT_SYMBOL_GPL(list_lru_walk_one); 346 347 unsigned long 348 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 349 list_lru_walk_cb isolate, void *cb_arg, 350 unsigned long *nr_to_walk) 351 { 352 return __list_lru_walk_one(lru, nid, memcg, isolate, 353 cb_arg, nr_to_walk, true); 354 } 355 356 unsigned long list_lru_walk_node(struct list_lru *lru, int nid, 357 list_lru_walk_cb isolate, void *cb_arg, 358 unsigned long *nr_to_walk) 359 { 360 long isolated = 0; 361 362 isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg, 363 nr_to_walk); 364 365 #ifdef CONFIG_MEMCG 366 if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) { 367 struct list_lru_memcg *mlru; 368 struct mem_cgroup *memcg; 369 unsigned long index; 370 371 xa_for_each(&lru->xa, index, mlru) { 372 rcu_read_lock(); 373 memcg = mem_cgroup_from_private_id(index); 374 if (!mem_cgroup_tryget(memcg)) { 375 rcu_read_unlock(); 376 continue; 377 } 378 rcu_read_unlock(); 379 isolated += __list_lru_walk_one(lru, nid, memcg, 380 isolate, cb_arg, 381 nr_to_walk, false); 382 mem_cgroup_put(memcg); 383 384 if (*nr_to_walk <= 0) 385 break; 386 } 387 } 388 #endif 389 390 return isolated; 391 } 392 EXPORT_SYMBOL_GPL(list_lru_walk_node); 393 394 static void init_one_lru(struct list_lru *lru, struct list_lru_one *l) 395 { 396 INIT_LIST_HEAD(&l->list); 397 spin_lock_init(&l->lock); 398 l->nr_items = 0; 399 #ifdef CONFIG_LOCKDEP 400 if (lru->key) 401 lockdep_set_class(&l->lock, lru->key); 402 #endif 403 } 404 405 #ifdef CONFIG_MEMCG 406 static struct list_lru_memcg *memcg_init_list_lru_one(struct list_lru *lru, gfp_t gfp) 407 { 408 int nid; 409 struct list_lru_memcg *mlru; 410 411 mlru = kmalloc_flex(*mlru, node, nr_node_ids, gfp); 412 if (!mlru) 413 return NULL; 414 415 for_each_node(nid) 416 init_one_lru(lru, &mlru->node[nid]); 417 418 return mlru; 419 } 420 421 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 422 { 423 if (memcg_aware) 424 xa_init_flags(&lru->xa, XA_FLAGS_LOCK_IRQ); 425 lru->memcg_aware = memcg_aware; 426 } 427 428 static void memcg_destroy_list_lru(struct list_lru *lru) 429 { 430 XA_STATE(xas, &lru->xa, 0); 431 struct list_lru_memcg *mlru; 432 433 if (!list_lru_memcg_aware(lru)) 434 return; 435 436 xas_lock_irq(&xas); 437 xas_for_each(&xas, mlru, ULONG_MAX) { 438 kfree(mlru); 439 xas_store(&xas, NULL); 440 } 441 xas_unlock_irq(&xas); 442 } 443 444 static void memcg_reparent_list_lru_one(struct list_lru *lru, int nid, 445 struct list_lru_one *src, 446 struct mem_cgroup *dst_memcg) 447 { 448 int dst_idx = dst_memcg->kmemcg_id; 449 struct list_lru_one *dst; 450 451 spin_lock_irq(&src->lock); 452 dst = list_lru_from_memcg_idx(lru, nid, dst_idx); 453 spin_lock_nested(&dst->lock, SINGLE_DEPTH_NESTING); 454 455 list_splice_init(&src->list, &dst->list); 456 if (src->nr_items) { 457 WARN_ON(src->nr_items < 0); 458 dst->nr_items += src->nr_items; 459 set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru)); 460 } 461 /* Mark the list_lru_one dead */ 462 src->nr_items = LONG_MIN; 463 464 spin_unlock(&dst->lock); 465 spin_unlock_irq(&src->lock); 466 } 467 468 void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent) 469 { 470 struct list_lru *lru; 471 int i; 472 473 mutex_lock(&list_lrus_mutex); 474 list_for_each_entry(lru, &memcg_list_lrus, list) { 475 struct list_lru_memcg *mlru; 476 XA_STATE(xas, &lru->xa, memcg->kmemcg_id); 477 478 /* 479 * Lock the Xarray to ensure no on going list_lru_memcg 480 * allocation and further allocation will see css_is_dying(). 481 */ 482 xas_lock_irq(&xas); 483 mlru = xas_store(&xas, NULL); 484 xas_unlock_irq(&xas); 485 if (!mlru) 486 continue; 487 488 /* 489 * With Xarray value set to NULL, holding the lru lock below 490 * prevents list_lru_{add,del,isolate} from touching the lru, 491 * safe to reparent. 492 */ 493 for_each_node(i) 494 memcg_reparent_list_lru_one(lru, i, &mlru->node[i], parent); 495 496 /* 497 * Here all list_lrus corresponding to the cgroup are guaranteed 498 * to remain empty, we can safely free this lru, any further 499 * memcg_list_lru_alloc() call will simply bail out. 500 */ 501 kvfree_rcu(mlru, rcu); 502 } 503 mutex_unlock(&list_lrus_mutex); 504 } 505 506 static inline bool memcg_list_lru_allocated(struct mem_cgroup *memcg, 507 struct list_lru *lru) 508 { 509 int idx = memcg->kmemcg_id; 510 511 return idx < 0 || xa_load(&lru->xa, idx); 512 } 513 514 int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru, 515 gfp_t gfp) 516 { 517 unsigned long flags; 518 struct list_lru_memcg *mlru = NULL; 519 struct mem_cgroup *pos, *parent; 520 XA_STATE(xas, &lru->xa, 0); 521 522 if (!list_lru_memcg_aware(lru) || memcg_list_lru_allocated(memcg, lru)) 523 return 0; 524 525 gfp &= GFP_RECLAIM_MASK; 526 /* 527 * Because the list_lru can be reparented to the parent cgroup's 528 * list_lru, we should make sure that this cgroup and all its 529 * ancestors have allocated list_lru_memcg. 530 */ 531 do { 532 /* 533 * Keep finding the farest parent that wasn't populated 534 * until found memcg itself. 535 */ 536 pos = memcg; 537 parent = parent_mem_cgroup(pos); 538 while (!memcg_list_lru_allocated(parent, lru)) { 539 pos = parent; 540 parent = parent_mem_cgroup(pos); 541 } 542 543 if (!mlru) { 544 mlru = memcg_init_list_lru_one(lru, gfp); 545 if (!mlru) 546 return -ENOMEM; 547 } 548 xas_set(&xas, pos->kmemcg_id); 549 do { 550 xas_lock_irqsave(&xas, flags); 551 if (!xas_load(&xas) && !css_is_dying(&pos->css)) { 552 xas_store(&xas, mlru); 553 if (!xas_error(&xas)) 554 mlru = NULL; 555 } 556 xas_unlock_irqrestore(&xas, flags); 557 } while (xas_nomem(&xas, gfp)); 558 } while (pos != memcg && !css_is_dying(&pos->css)); 559 560 if (unlikely(mlru)) 561 kfree(mlru); 562 563 return xas_error(&xas); 564 } 565 #else 566 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 567 { 568 } 569 570 static void memcg_destroy_list_lru(struct list_lru *lru) 571 { 572 } 573 #endif /* CONFIG_MEMCG */ 574 575 int __list_lru_init(struct list_lru *lru, bool memcg_aware, struct shrinker *shrinker) 576 { 577 int i; 578 579 #ifdef CONFIG_MEMCG 580 if (shrinker) 581 lru->shrinker_id = shrinker->id; 582 else 583 lru->shrinker_id = -1; 584 585 if (mem_cgroup_kmem_disabled()) 586 memcg_aware = false; 587 #endif 588 589 lru->node = kzalloc_objs(*lru->node, nr_node_ids); 590 if (!lru->node) 591 return -ENOMEM; 592 593 for_each_node(i) 594 init_one_lru(lru, &lru->node[i].lru); 595 596 memcg_init_list_lru(lru, memcg_aware); 597 list_lru_register(lru); 598 599 return 0; 600 } 601 EXPORT_SYMBOL_GPL(__list_lru_init); 602 603 void list_lru_destroy(struct list_lru *lru) 604 { 605 /* Already destroyed or not yet initialized? */ 606 if (!lru->node) 607 return; 608 609 list_lru_unregister(lru); 610 611 memcg_destroy_list_lru(lru); 612 kfree(lru->node); 613 lru->node = NULL; 614 615 #ifdef CONFIG_MEMCG 616 lru->shrinker_id = -1; 617 #endif 618 } 619 EXPORT_SYMBOL_GPL(list_lru_destroy); 620