1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright 2023-2024 Intel Corporation (Maarten Lankhorst <dev@lankhorst.se>)
4 * Copyright 2024 Red Hat (Maxime Ripard <mripard@kernel.org>)
5 * Partially based on the rdma and misc controllers, which bear the following copyrights:
6 *
7 * Copyright 2020 Google LLC
8 * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com>
9 */
10
11 #include <linux/cgroup.h>
12 #include <linux/cgroup_dmem.h>
13 #include <linux/list.h>
14 #include <linux/mutex.h>
15 #include <linux/page_counter.h>
16 #include <linux/parser.h>
17 #include <linux/rculist.h>
18 #include <linux/slab.h>
19
20 struct dmem_cgroup_region {
21 /**
22 * @ref: References keeping the region alive.
23 * Keeps the region reference alive after a succesful RCU lookup.
24 */
25 struct kref ref;
26
27 /** @rcu: RCU head for freeing */
28 struct rcu_head rcu;
29
30 /**
31 * @region_node: Linked into &dmem_cgroup_regions list.
32 * Protected by RCU and global spinlock.
33 */
34 struct list_head region_node;
35
36 /**
37 * @pools: List of pools linked to this region.
38 * Protected by global spinlock only
39 */
40 struct list_head pools;
41
42 /** @size: Size of region, in bytes */
43 u64 size;
44
45 /** @name: Name describing the node, set by dmem_cgroup_register_region */
46 char *name;
47
48 /**
49 * @unregistered: Whether the region is unregistered by its caller.
50 * No new pools should be added to the region afterwards.
51 */
52 bool unregistered;
53 };
54
55 struct dmemcg_state {
56 struct cgroup_subsys_state css;
57
58 struct list_head pools;
59 };
60
61 struct dmem_cgroup_pool_state {
62 struct dmem_cgroup_region *region;
63 struct dmemcg_state *cs;
64
65 /* css node, RCU protected against region teardown */
66 struct list_head css_node;
67
68 /* dev node, no RCU protection required */
69 struct list_head region_node;
70
71 struct rcu_head rcu;
72
73 struct page_counter cnt;
74
75 bool inited;
76 };
77
78 /*
79 * 3 operations require locking protection:
80 * - Registering and unregistering region to/from list, requires global lock.
81 * - Adding a dmem_cgroup_pool_state to a CSS, removing when CSS is freed.
82 * - Adding a dmem_cgroup_pool_state to a region list.
83 *
84 * Since for the most common operations RCU provides enough protection, I
85 * do not think more granular locking makes sense. Most protection is offered
86 * by RCU and the lockless operating page_counter.
87 */
88 static DEFINE_SPINLOCK(dmemcg_lock);
89 static LIST_HEAD(dmem_cgroup_regions);
90
91 static inline struct dmemcg_state *
css_to_dmemcs(struct cgroup_subsys_state * css)92 css_to_dmemcs(struct cgroup_subsys_state *css)
93 {
94 return container_of(css, struct dmemcg_state, css);
95 }
96
get_current_dmemcs(void)97 static inline struct dmemcg_state *get_current_dmemcs(void)
98 {
99 return css_to_dmemcs(task_get_css(current, dmem_cgrp_id));
100 }
101
parent_dmemcs(struct dmemcg_state * cg)102 static struct dmemcg_state *parent_dmemcs(struct dmemcg_state *cg)
103 {
104 return cg->css.parent ? css_to_dmemcs(cg->css.parent) : NULL;
105 }
106
free_cg_pool(struct dmem_cgroup_pool_state * pool)107 static void free_cg_pool(struct dmem_cgroup_pool_state *pool)
108 {
109 list_del(&pool->region_node);
110 kfree(pool);
111 }
112
113 static void
set_resource_min(struct dmem_cgroup_pool_state * pool,u64 val)114 set_resource_min(struct dmem_cgroup_pool_state *pool, u64 val)
115 {
116 page_counter_set_min(&pool->cnt, val);
117 }
118
119 static void
set_resource_low(struct dmem_cgroup_pool_state * pool,u64 val)120 set_resource_low(struct dmem_cgroup_pool_state *pool, u64 val)
121 {
122 page_counter_set_low(&pool->cnt, val);
123 }
124
125 static void
set_resource_max(struct dmem_cgroup_pool_state * pool,u64 val)126 set_resource_max(struct dmem_cgroup_pool_state *pool, u64 val)
127 {
128 page_counter_set_max(&pool->cnt, val);
129 }
130
get_resource_low(struct dmem_cgroup_pool_state * pool)131 static u64 get_resource_low(struct dmem_cgroup_pool_state *pool)
132 {
133 return pool ? READ_ONCE(pool->cnt.low) : 0;
134 }
135
get_resource_min(struct dmem_cgroup_pool_state * pool)136 static u64 get_resource_min(struct dmem_cgroup_pool_state *pool)
137 {
138 return pool ? READ_ONCE(pool->cnt.min) : 0;
139 }
140
get_resource_max(struct dmem_cgroup_pool_state * pool)141 static u64 get_resource_max(struct dmem_cgroup_pool_state *pool)
142 {
143 return pool ? READ_ONCE(pool->cnt.max) : PAGE_COUNTER_MAX;
144 }
145
get_resource_current(struct dmem_cgroup_pool_state * pool)146 static u64 get_resource_current(struct dmem_cgroup_pool_state *pool)
147 {
148 return pool ? page_counter_read(&pool->cnt) : 0;
149 }
150
reset_all_resource_limits(struct dmem_cgroup_pool_state * rpool)151 static void reset_all_resource_limits(struct dmem_cgroup_pool_state *rpool)
152 {
153 set_resource_min(rpool, 0);
154 set_resource_low(rpool, 0);
155 set_resource_max(rpool, PAGE_COUNTER_MAX);
156 }
157
dmemcs_offline(struct cgroup_subsys_state * css)158 static void dmemcs_offline(struct cgroup_subsys_state *css)
159 {
160 struct dmemcg_state *dmemcs = css_to_dmemcs(css);
161 struct dmem_cgroup_pool_state *pool;
162
163 rcu_read_lock();
164 list_for_each_entry_rcu(pool, &dmemcs->pools, css_node)
165 reset_all_resource_limits(pool);
166 rcu_read_unlock();
167 }
168
dmemcs_free(struct cgroup_subsys_state * css)169 static void dmemcs_free(struct cgroup_subsys_state *css)
170 {
171 struct dmemcg_state *dmemcs = css_to_dmemcs(css);
172 struct dmem_cgroup_pool_state *pool, *next;
173
174 spin_lock(&dmemcg_lock);
175 list_for_each_entry_safe(pool, next, &dmemcs->pools, css_node) {
176 /*
177 *The pool is dead and all references are 0,
178 * no need for RCU protection with list_del_rcu or freeing.
179 */
180 list_del(&pool->css_node);
181 free_cg_pool(pool);
182 }
183 spin_unlock(&dmemcg_lock);
184
185 kfree(dmemcs);
186 }
187
188 static struct cgroup_subsys_state *
dmemcs_alloc(struct cgroup_subsys_state * parent_css)189 dmemcs_alloc(struct cgroup_subsys_state *parent_css)
190 {
191 struct dmemcg_state *dmemcs = kzalloc(sizeof(*dmemcs), GFP_KERNEL);
192 if (!dmemcs)
193 return ERR_PTR(-ENOMEM);
194
195 INIT_LIST_HEAD(&dmemcs->pools);
196 return &dmemcs->css;
197 }
198
199 static struct dmem_cgroup_pool_state *
find_cg_pool_locked(struct dmemcg_state * dmemcs,struct dmem_cgroup_region * region)200 find_cg_pool_locked(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *region)
201 {
202 struct dmem_cgroup_pool_state *pool;
203
204 list_for_each_entry_rcu(pool, &dmemcs->pools, css_node, spin_is_locked(&dmemcg_lock))
205 if (pool->region == region)
206 return pool;
207
208 return NULL;
209 }
210
pool_parent(struct dmem_cgroup_pool_state * pool)211 static struct dmem_cgroup_pool_state *pool_parent(struct dmem_cgroup_pool_state *pool)
212 {
213 if (!pool->cnt.parent)
214 return NULL;
215
216 return container_of(pool->cnt.parent, typeof(*pool), cnt);
217 }
218
219 static void
dmem_cgroup_calculate_protection(struct dmem_cgroup_pool_state * limit_pool,struct dmem_cgroup_pool_state * test_pool)220 dmem_cgroup_calculate_protection(struct dmem_cgroup_pool_state *limit_pool,
221 struct dmem_cgroup_pool_state *test_pool)
222 {
223 struct page_counter *climit;
224 struct cgroup_subsys_state *css;
225 struct dmemcg_state *dmemcg_iter;
226 struct dmem_cgroup_pool_state *pool, *found_pool;
227
228 climit = &limit_pool->cnt;
229
230 rcu_read_lock();
231
232 css_for_each_descendant_pre(css, &limit_pool->cs->css) {
233 dmemcg_iter = container_of(css, struct dmemcg_state, css);
234 found_pool = NULL;
235
236 list_for_each_entry_rcu(pool, &dmemcg_iter->pools, css_node) {
237 if (pool->region == limit_pool->region) {
238 found_pool = pool;
239 break;
240 }
241 }
242 if (!found_pool)
243 continue;
244
245 page_counter_calculate_protection(
246 climit, &found_pool->cnt, true);
247
248 if (found_pool == test_pool)
249 break;
250 }
251 rcu_read_unlock();
252 }
253
254 /**
255 * dmem_cgroup_state_evict_valuable() - Check if we should evict from test_pool
256 * @limit_pool: The pool for which we hit limits
257 * @test_pool: The pool for which to test
258 * @ignore_low: Whether we have to respect low watermarks.
259 * @ret_hit_low: Pointer to whether it makes sense to consider low watermark.
260 *
261 * This function returns true if we can evict from @test_pool, false if not.
262 * When returning false and @ignore_low is false, @ret_hit_low may
263 * be set to true to indicate this function can be retried with @ignore_low
264 * set to true.
265 *
266 * Return: bool
267 */
dmem_cgroup_state_evict_valuable(struct dmem_cgroup_pool_state * limit_pool,struct dmem_cgroup_pool_state * test_pool,bool ignore_low,bool * ret_hit_low)268 bool dmem_cgroup_state_evict_valuable(struct dmem_cgroup_pool_state *limit_pool,
269 struct dmem_cgroup_pool_state *test_pool,
270 bool ignore_low, bool *ret_hit_low)
271 {
272 struct dmem_cgroup_pool_state *pool = test_pool;
273 struct page_counter *ctest;
274 u64 used, min, low;
275
276 /* Can always evict from current pool, despite limits */
277 if (limit_pool == test_pool)
278 return true;
279
280 if (limit_pool) {
281 if (!parent_dmemcs(limit_pool->cs))
282 return true;
283
284 for (pool = test_pool; pool && limit_pool != pool; pool = pool_parent(pool))
285 {}
286
287 if (!pool)
288 return false;
289 } else {
290 /*
291 * If there is no cgroup limiting memory usage, use the root
292 * cgroup instead for limit calculations.
293 */
294 for (limit_pool = test_pool; pool_parent(limit_pool); limit_pool = pool_parent(limit_pool))
295 {}
296 }
297
298 ctest = &test_pool->cnt;
299
300 dmem_cgroup_calculate_protection(limit_pool, test_pool);
301
302 used = page_counter_read(ctest);
303 min = READ_ONCE(ctest->emin);
304
305 if (used <= min)
306 return false;
307
308 if (!ignore_low) {
309 low = READ_ONCE(ctest->elow);
310 if (used > low)
311 return true;
312
313 *ret_hit_low = true;
314 return false;
315 }
316 return true;
317 }
318 EXPORT_SYMBOL_GPL(dmem_cgroup_state_evict_valuable);
319
320 static struct dmem_cgroup_pool_state *
alloc_pool_single(struct dmemcg_state * dmemcs,struct dmem_cgroup_region * region,struct dmem_cgroup_pool_state ** allocpool)321 alloc_pool_single(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *region,
322 struct dmem_cgroup_pool_state **allocpool)
323 {
324 struct dmemcg_state *parent = parent_dmemcs(dmemcs);
325 struct dmem_cgroup_pool_state *pool, *ppool = NULL;
326
327 if (!*allocpool) {
328 pool = kzalloc(sizeof(*pool), GFP_NOWAIT);
329 if (!pool)
330 return ERR_PTR(-ENOMEM);
331 } else {
332 pool = *allocpool;
333 *allocpool = NULL;
334 }
335
336 pool->region = region;
337 pool->cs = dmemcs;
338
339 if (parent)
340 ppool = find_cg_pool_locked(parent, region);
341
342 page_counter_init(&pool->cnt,
343 ppool ? &ppool->cnt : NULL, true);
344 reset_all_resource_limits(pool);
345
346 list_add_tail_rcu(&pool->css_node, &dmemcs->pools);
347 list_add_tail(&pool->region_node, ®ion->pools);
348
349 if (!parent)
350 pool->inited = true;
351 else
352 pool->inited = ppool ? ppool->inited : false;
353 return pool;
354 }
355
356 static struct dmem_cgroup_pool_state *
get_cg_pool_locked(struct dmemcg_state * dmemcs,struct dmem_cgroup_region * region,struct dmem_cgroup_pool_state ** allocpool)357 get_cg_pool_locked(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *region,
358 struct dmem_cgroup_pool_state **allocpool)
359 {
360 struct dmem_cgroup_pool_state *pool, *ppool, *retpool;
361 struct dmemcg_state *p, *pp;
362
363 /*
364 * Recursively create pool, we may not initialize yet on
365 * recursion, this is done as a separate step.
366 */
367 for (p = dmemcs; p; p = parent_dmemcs(p)) {
368 pool = find_cg_pool_locked(p, region);
369 if (!pool)
370 pool = alloc_pool_single(p, region, allocpool);
371
372 if (IS_ERR(pool))
373 return pool;
374
375 if (p == dmemcs && pool->inited)
376 return pool;
377
378 if (pool->inited)
379 break;
380 }
381
382 retpool = pool = find_cg_pool_locked(dmemcs, region);
383 for (p = dmemcs, pp = parent_dmemcs(dmemcs); pp; p = pp, pp = parent_dmemcs(p)) {
384 if (pool->inited)
385 break;
386
387 /* ppool was created if it didn't exist by above loop. */
388 ppool = find_cg_pool_locked(pp, region);
389
390 /* Fix up parent links, mark as inited. */
391 pool->cnt.parent = &ppool->cnt;
392 pool->inited = true;
393
394 pool = ppool;
395 }
396
397 return retpool;
398 }
399
dmemcg_free_rcu(struct rcu_head * rcu)400 static void dmemcg_free_rcu(struct rcu_head *rcu)
401 {
402 struct dmem_cgroup_region *region = container_of(rcu, typeof(*region), rcu);
403 struct dmem_cgroup_pool_state *pool, *next;
404
405 list_for_each_entry_safe(pool, next, ®ion->pools, region_node)
406 free_cg_pool(pool);
407 kfree(region->name);
408 kfree(region);
409 }
410
dmemcg_free_region(struct kref * ref)411 static void dmemcg_free_region(struct kref *ref)
412 {
413 struct dmem_cgroup_region *cgregion = container_of(ref, typeof(*cgregion), ref);
414
415 call_rcu(&cgregion->rcu, dmemcg_free_rcu);
416 }
417
418 /**
419 * dmem_cgroup_unregister_region() - Unregister a previously registered region.
420 * @region: The region to unregister.
421 *
422 * This function undoes dmem_cgroup_register_region.
423 */
dmem_cgroup_unregister_region(struct dmem_cgroup_region * region)424 void dmem_cgroup_unregister_region(struct dmem_cgroup_region *region)
425 {
426 struct list_head *entry;
427
428 if (!region)
429 return;
430
431 spin_lock(&dmemcg_lock);
432
433 /* Remove from global region list */
434 list_del_rcu(®ion->region_node);
435
436 list_for_each_rcu(entry, ®ion->pools) {
437 struct dmem_cgroup_pool_state *pool =
438 container_of(entry, typeof(*pool), region_node);
439
440 list_del_rcu(&pool->css_node);
441 }
442
443 /*
444 * Ensure any RCU based lookups fail. Additionally,
445 * no new pools should be added to the dead region
446 * by get_cg_pool_unlocked.
447 */
448 region->unregistered = true;
449 spin_unlock(&dmemcg_lock);
450
451 kref_put(®ion->ref, dmemcg_free_region);
452 }
453 EXPORT_SYMBOL_GPL(dmem_cgroup_unregister_region);
454
455 /**
456 * dmem_cgroup_register_region() - Register a regions for dev cgroup.
457 * @size: Size of region to register, in bytes.
458 * @fmt: Region parameters to register
459 *
460 * This function registers a node in the dmem cgroup with the
461 * name given. After calling this function, the region can be
462 * used for allocations.
463 *
464 * Return: NULL or a struct on success, PTR_ERR on failure.
465 */
dmem_cgroup_register_region(u64 size,const char * fmt,...)466 struct dmem_cgroup_region *dmem_cgroup_register_region(u64 size, const char *fmt, ...)
467 {
468 struct dmem_cgroup_region *ret;
469 char *region_name;
470 va_list ap;
471
472 if (!size)
473 return NULL;
474
475 va_start(ap, fmt);
476 region_name = kvasprintf(GFP_KERNEL, fmt, ap);
477 va_end(ap);
478 if (!region_name)
479 return ERR_PTR(-ENOMEM);
480
481 ret = kzalloc(sizeof(*ret), GFP_KERNEL);
482 if (!ret) {
483 kfree(region_name);
484 return ERR_PTR(-ENOMEM);
485 }
486
487 INIT_LIST_HEAD(&ret->pools);
488 ret->name = region_name;
489 ret->size = size;
490 kref_init(&ret->ref);
491
492 spin_lock(&dmemcg_lock);
493 list_add_tail_rcu(&ret->region_node, &dmem_cgroup_regions);
494 spin_unlock(&dmemcg_lock);
495
496 return ret;
497 }
498 EXPORT_SYMBOL_GPL(dmem_cgroup_register_region);
499
dmemcg_get_region_by_name(const char * name)500 static struct dmem_cgroup_region *dmemcg_get_region_by_name(const char *name)
501 {
502 struct dmem_cgroup_region *region;
503
504 list_for_each_entry_rcu(region, &dmem_cgroup_regions, region_node, spin_is_locked(&dmemcg_lock))
505 if (!strcmp(name, region->name) &&
506 kref_get_unless_zero(®ion->ref))
507 return region;
508
509 return NULL;
510 }
511
512 /**
513 * dmem_cgroup_pool_state_put() - Drop a reference to a dmem_cgroup_pool_state
514 * @pool: &dmem_cgroup_pool_state
515 *
516 * Called to drop a reference to the limiting pool returned by
517 * dmem_cgroup_try_charge().
518 */
dmem_cgroup_pool_state_put(struct dmem_cgroup_pool_state * pool)519 void dmem_cgroup_pool_state_put(struct dmem_cgroup_pool_state *pool)
520 {
521 if (pool)
522 css_put(&pool->cs->css);
523 }
524 EXPORT_SYMBOL_GPL(dmem_cgroup_pool_state_put);
525
526 static struct dmem_cgroup_pool_state *
get_cg_pool_unlocked(struct dmemcg_state * cg,struct dmem_cgroup_region * region)527 get_cg_pool_unlocked(struct dmemcg_state *cg, struct dmem_cgroup_region *region)
528 {
529 struct dmem_cgroup_pool_state *pool, *allocpool = NULL;
530
531 /* fastpath lookup? */
532 rcu_read_lock();
533 pool = find_cg_pool_locked(cg, region);
534 if (pool && !READ_ONCE(pool->inited))
535 pool = NULL;
536 rcu_read_unlock();
537
538 while (!pool) {
539 spin_lock(&dmemcg_lock);
540 if (!region->unregistered)
541 pool = get_cg_pool_locked(cg, region, &allocpool);
542 else
543 pool = ERR_PTR(-ENODEV);
544 spin_unlock(&dmemcg_lock);
545
546 if (pool == ERR_PTR(-ENOMEM)) {
547 pool = NULL;
548 if (WARN_ON(allocpool))
549 continue;
550
551 allocpool = kzalloc(sizeof(*allocpool), GFP_KERNEL);
552 if (allocpool) {
553 pool = NULL;
554 continue;
555 }
556 }
557 }
558
559 kfree(allocpool);
560 return pool;
561 }
562
563 /**
564 * dmem_cgroup_uncharge() - Uncharge a pool.
565 * @pool: Pool to uncharge.
566 * @size: Size to uncharge.
567 *
568 * Undoes the effects of dmem_cgroup_try_charge.
569 * Must be called with the returned pool as argument,
570 * and same @index and @size.
571 */
dmem_cgroup_uncharge(struct dmem_cgroup_pool_state * pool,u64 size)572 void dmem_cgroup_uncharge(struct dmem_cgroup_pool_state *pool, u64 size)
573 {
574 if (!pool)
575 return;
576
577 page_counter_uncharge(&pool->cnt, size);
578 css_put(&pool->cs->css);
579 }
580 EXPORT_SYMBOL_GPL(dmem_cgroup_uncharge);
581
582 /**
583 * dmem_cgroup_try_charge() - Try charging a new allocation to a region.
584 * @region: dmem region to charge
585 * @size: Size (in bytes) to charge.
586 * @ret_pool: On succesfull allocation, the pool that is charged.
587 * @ret_limit_pool: On a failed allocation, the limiting pool.
588 *
589 * This function charges the @region region for a size of @size bytes.
590 *
591 * If the function succeeds, @ret_pool is set, which must be passed to
592 * dmem_cgroup_uncharge() when undoing the allocation.
593 *
594 * When this function fails with -EAGAIN and @ret_limit_pool is non-null, it
595 * will be set to the pool for which the limit is hit. This can be used for
596 * eviction as argument to dmem_cgroup_evict_valuable(). This reference must be freed
597 * with @dmem_cgroup_pool_state_put().
598 *
599 * Return: 0 on success, -EAGAIN on hitting a limit, or a negative errno on failure.
600 */
dmem_cgroup_try_charge(struct dmem_cgroup_region * region,u64 size,struct dmem_cgroup_pool_state ** ret_pool,struct dmem_cgroup_pool_state ** ret_limit_pool)601 int dmem_cgroup_try_charge(struct dmem_cgroup_region *region, u64 size,
602 struct dmem_cgroup_pool_state **ret_pool,
603 struct dmem_cgroup_pool_state **ret_limit_pool)
604 {
605 struct dmemcg_state *cg;
606 struct dmem_cgroup_pool_state *pool;
607 struct page_counter *fail;
608 int ret;
609
610 *ret_pool = NULL;
611 if (ret_limit_pool)
612 *ret_limit_pool = NULL;
613
614 /*
615 * hold on to css, as cgroup can be removed but resource
616 * accounting happens on css.
617 */
618 cg = get_current_dmemcs();
619
620 pool = get_cg_pool_unlocked(cg, region);
621 if (IS_ERR(pool)) {
622 ret = PTR_ERR(pool);
623 goto err;
624 }
625
626 if (!page_counter_try_charge(&pool->cnt, size, &fail)) {
627 if (ret_limit_pool) {
628 *ret_limit_pool = container_of(fail, struct dmem_cgroup_pool_state, cnt);
629 css_get(&(*ret_limit_pool)->cs->css);
630 }
631 ret = -EAGAIN;
632 goto err;
633 }
634
635 /* On success, reference from get_current_dmemcs is transferred to *ret_pool */
636 *ret_pool = pool;
637 return 0;
638
639 err:
640 css_put(&cg->css);
641 return ret;
642 }
643 EXPORT_SYMBOL_GPL(dmem_cgroup_try_charge);
644
dmem_cgroup_region_capacity_show(struct seq_file * sf,void * v)645 static int dmem_cgroup_region_capacity_show(struct seq_file *sf, void *v)
646 {
647 struct dmem_cgroup_region *region;
648
649 rcu_read_lock();
650 list_for_each_entry_rcu(region, &dmem_cgroup_regions, region_node) {
651 seq_puts(sf, region->name);
652 seq_printf(sf, " %llu\n", region->size);
653 }
654 rcu_read_unlock();
655 return 0;
656 }
657
dmemcg_parse_limit(char * options,struct dmem_cgroup_region * region,u64 * new_limit)658 static int dmemcg_parse_limit(char *options, struct dmem_cgroup_region *region,
659 u64 *new_limit)
660 {
661 char *end;
662
663 if (!strcmp(options, "max")) {
664 *new_limit = PAGE_COUNTER_MAX;
665 return 0;
666 }
667
668 *new_limit = memparse(options, &end);
669 if (*end != '\0')
670 return -EINVAL;
671
672 return 0;
673 }
674
dmemcg_limit_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off,void (* apply)(struct dmem_cgroup_pool_state *,u64))675 static ssize_t dmemcg_limit_write(struct kernfs_open_file *of,
676 char *buf, size_t nbytes, loff_t off,
677 void (*apply)(struct dmem_cgroup_pool_state *, u64))
678 {
679 struct dmemcg_state *dmemcs = css_to_dmemcs(of_css(of));
680 int err = 0;
681
682 while (buf && !err) {
683 struct dmem_cgroup_pool_state *pool = NULL;
684 char *options, *region_name;
685 struct dmem_cgroup_region *region;
686 u64 new_limit;
687
688 options = buf;
689 buf = strchr(buf, '\n');
690 if (buf)
691 *buf++ = '\0';
692
693 options = strstrip(options);
694
695 /* eat empty lines */
696 if (!options[0])
697 continue;
698
699 region_name = strsep(&options, " \t");
700 if (!region_name[0])
701 continue;
702
703 rcu_read_lock();
704 region = dmemcg_get_region_by_name(region_name);
705 rcu_read_unlock();
706
707 if (!region)
708 return -EINVAL;
709
710 err = dmemcg_parse_limit(options, region, &new_limit);
711 if (err < 0)
712 goto out_put;
713
714 pool = get_cg_pool_unlocked(dmemcs, region);
715 if (IS_ERR(pool)) {
716 err = PTR_ERR(pool);
717 goto out_put;
718 }
719
720 /* And commit */
721 apply(pool, new_limit);
722
723 out_put:
724 kref_put(®ion->ref, dmemcg_free_region);
725 }
726
727
728 return err ?: nbytes;
729 }
730
dmemcg_limit_show(struct seq_file * sf,void * v,u64 (* fn)(struct dmem_cgroup_pool_state *))731 static int dmemcg_limit_show(struct seq_file *sf, void *v,
732 u64 (*fn)(struct dmem_cgroup_pool_state *))
733 {
734 struct dmemcg_state *dmemcs = css_to_dmemcs(seq_css(sf));
735 struct dmem_cgroup_region *region;
736
737 rcu_read_lock();
738 list_for_each_entry_rcu(region, &dmem_cgroup_regions, region_node) {
739 struct dmem_cgroup_pool_state *pool = find_cg_pool_locked(dmemcs, region);
740 u64 val;
741
742 seq_puts(sf, region->name);
743
744 val = fn(pool);
745 if (val < PAGE_COUNTER_MAX)
746 seq_printf(sf, " %lld\n", val);
747 else
748 seq_puts(sf, " max\n");
749 }
750 rcu_read_unlock();
751
752 return 0;
753 }
754
dmem_cgroup_region_current_show(struct seq_file * sf,void * v)755 static int dmem_cgroup_region_current_show(struct seq_file *sf, void *v)
756 {
757 return dmemcg_limit_show(sf, v, get_resource_current);
758 }
759
dmem_cgroup_region_min_show(struct seq_file * sf,void * v)760 static int dmem_cgroup_region_min_show(struct seq_file *sf, void *v)
761 {
762 return dmemcg_limit_show(sf, v, get_resource_min);
763 }
764
dmem_cgroup_region_min_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)765 static ssize_t dmem_cgroup_region_min_write(struct kernfs_open_file *of,
766 char *buf, size_t nbytes, loff_t off)
767 {
768 return dmemcg_limit_write(of, buf, nbytes, off, set_resource_min);
769 }
770
dmem_cgroup_region_low_show(struct seq_file * sf,void * v)771 static int dmem_cgroup_region_low_show(struct seq_file *sf, void *v)
772 {
773 return dmemcg_limit_show(sf, v, get_resource_low);
774 }
775
dmem_cgroup_region_low_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)776 static ssize_t dmem_cgroup_region_low_write(struct kernfs_open_file *of,
777 char *buf, size_t nbytes, loff_t off)
778 {
779 return dmemcg_limit_write(of, buf, nbytes, off, set_resource_low);
780 }
781
dmem_cgroup_region_max_show(struct seq_file * sf,void * v)782 static int dmem_cgroup_region_max_show(struct seq_file *sf, void *v)
783 {
784 return dmemcg_limit_show(sf, v, get_resource_max);
785 }
786
dmem_cgroup_region_max_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)787 static ssize_t dmem_cgroup_region_max_write(struct kernfs_open_file *of,
788 char *buf, size_t nbytes, loff_t off)
789 {
790 return dmemcg_limit_write(of, buf, nbytes, off, set_resource_max);
791 }
792
793 static struct cftype files[] = {
794 {
795 .name = "capacity",
796 .seq_show = dmem_cgroup_region_capacity_show,
797 .flags = CFTYPE_ONLY_ON_ROOT,
798 },
799 {
800 .name = "current",
801 .seq_show = dmem_cgroup_region_current_show,
802 },
803 {
804 .name = "min",
805 .write = dmem_cgroup_region_min_write,
806 .seq_show = dmem_cgroup_region_min_show,
807 .flags = CFTYPE_NOT_ON_ROOT,
808 },
809 {
810 .name = "low",
811 .write = dmem_cgroup_region_low_write,
812 .seq_show = dmem_cgroup_region_low_show,
813 .flags = CFTYPE_NOT_ON_ROOT,
814 },
815 {
816 .name = "max",
817 .write = dmem_cgroup_region_max_write,
818 .seq_show = dmem_cgroup_region_max_show,
819 .flags = CFTYPE_NOT_ON_ROOT,
820 },
821 { } /* Zero entry terminates. */
822 };
823
824 struct cgroup_subsys dmem_cgrp_subsys = {
825 .css_alloc = dmemcs_alloc,
826 .css_free = dmemcs_free,
827 .css_offline = dmemcs_offline,
828 .legacy_cftypes = files,
829 .dfl_cftypes = files,
830 };
831