xref: /linux/mm/hugetlb_cgroup.c (revision da1def55919f4852c4759249a78d63a0c5d2d8f9)
12bc64a20SAneesh Kumar K.V /*
22bc64a20SAneesh Kumar K.V  *
32bc64a20SAneesh Kumar K.V  * Copyright IBM Corporation, 2012
42bc64a20SAneesh Kumar K.V  * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
52bc64a20SAneesh Kumar K.V  *
62bc64a20SAneesh Kumar K.V  * This program is free software; you can redistribute it and/or modify it
72bc64a20SAneesh Kumar K.V  * under the terms of version 2.1 of the GNU Lesser General Public License
82bc64a20SAneesh Kumar K.V  * as published by the Free Software Foundation.
92bc64a20SAneesh Kumar K.V  *
102bc64a20SAneesh Kumar K.V  * This program is distributed in the hope that it would be useful, but
112bc64a20SAneesh Kumar K.V  * WITHOUT ANY WARRANTY; without even the implied warranty of
122bc64a20SAneesh Kumar K.V  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
132bc64a20SAneesh Kumar K.V  *
142bc64a20SAneesh Kumar K.V  */
152bc64a20SAneesh Kumar K.V 
162bc64a20SAneesh Kumar K.V #include <linux/cgroup.h>
172bc64a20SAneesh Kumar K.V #include <linux/slab.h>
182bc64a20SAneesh Kumar K.V #include <linux/hugetlb.h>
192bc64a20SAneesh Kumar K.V #include <linux/hugetlb_cgroup.h>
202bc64a20SAneesh Kumar K.V 
212bc64a20SAneesh Kumar K.V struct hugetlb_cgroup {
222bc64a20SAneesh Kumar K.V 	struct cgroup_subsys_state css;
232bc64a20SAneesh Kumar K.V 	/*
242bc64a20SAneesh Kumar K.V 	 * the counter to account for hugepages from hugetlb.
252bc64a20SAneesh Kumar K.V 	 */
262bc64a20SAneesh Kumar K.V 	struct res_counter hugepage[HUGE_MAX_HSTATE];
272bc64a20SAneesh Kumar K.V };
282bc64a20SAneesh Kumar K.V 
292bc64a20SAneesh Kumar K.V struct cgroup_subsys hugetlb_subsys __read_mostly;
302bc64a20SAneesh Kumar K.V static struct hugetlb_cgroup *root_h_cgroup __read_mostly;
312bc64a20SAneesh Kumar K.V 
322bc64a20SAneesh Kumar K.V static inline
332bc64a20SAneesh Kumar K.V struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s)
342bc64a20SAneesh Kumar K.V {
352bc64a20SAneesh Kumar K.V 	return container_of(s, struct hugetlb_cgroup, css);
362bc64a20SAneesh Kumar K.V }
372bc64a20SAneesh Kumar K.V 
382bc64a20SAneesh Kumar K.V static inline
392bc64a20SAneesh Kumar K.V struct hugetlb_cgroup *hugetlb_cgroup_from_cgroup(struct cgroup *cgroup)
402bc64a20SAneesh Kumar K.V {
412bc64a20SAneesh Kumar K.V 	return hugetlb_cgroup_from_css(cgroup_subsys_state(cgroup,
422bc64a20SAneesh Kumar K.V 							   hugetlb_subsys_id));
432bc64a20SAneesh Kumar K.V }
442bc64a20SAneesh Kumar K.V 
452bc64a20SAneesh Kumar K.V static inline
462bc64a20SAneesh Kumar K.V struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task)
472bc64a20SAneesh Kumar K.V {
482bc64a20SAneesh Kumar K.V 	return hugetlb_cgroup_from_css(task_subsys_state(task,
492bc64a20SAneesh Kumar K.V 							 hugetlb_subsys_id));
502bc64a20SAneesh Kumar K.V }
512bc64a20SAneesh Kumar K.V 
522bc64a20SAneesh Kumar K.V static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
532bc64a20SAneesh Kumar K.V {
542bc64a20SAneesh Kumar K.V 	return (h_cg == root_h_cgroup);
552bc64a20SAneesh Kumar K.V }
562bc64a20SAneesh Kumar K.V 
572bc64a20SAneesh Kumar K.V static inline struct hugetlb_cgroup *parent_hugetlb_cgroup(struct cgroup *cg)
582bc64a20SAneesh Kumar K.V {
592bc64a20SAneesh Kumar K.V 	if (!cg->parent)
602bc64a20SAneesh Kumar K.V 		return NULL;
612bc64a20SAneesh Kumar K.V 	return hugetlb_cgroup_from_cgroup(cg->parent);
622bc64a20SAneesh Kumar K.V }
632bc64a20SAneesh Kumar K.V 
642bc64a20SAneesh Kumar K.V static inline bool hugetlb_cgroup_have_usage(struct cgroup *cg)
652bc64a20SAneesh Kumar K.V {
662bc64a20SAneesh Kumar K.V 	int idx;
672bc64a20SAneesh Kumar K.V 	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cg);
682bc64a20SAneesh Kumar K.V 
692bc64a20SAneesh Kumar K.V 	for (idx = 0; idx < hugetlb_max_hstate; idx++) {
702bc64a20SAneesh Kumar K.V 		if ((res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE)) > 0)
712bc64a20SAneesh Kumar K.V 			return true;
722bc64a20SAneesh Kumar K.V 	}
732bc64a20SAneesh Kumar K.V 	return false;
742bc64a20SAneesh Kumar K.V }
752bc64a20SAneesh Kumar K.V 
762bc64a20SAneesh Kumar K.V static struct cgroup_subsys_state *hugetlb_cgroup_create(struct cgroup *cgroup)
772bc64a20SAneesh Kumar K.V {
782bc64a20SAneesh Kumar K.V 	int idx;
792bc64a20SAneesh Kumar K.V 	struct cgroup *parent_cgroup;
802bc64a20SAneesh Kumar K.V 	struct hugetlb_cgroup *h_cgroup, *parent_h_cgroup;
812bc64a20SAneesh Kumar K.V 
822bc64a20SAneesh Kumar K.V 	h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL);
832bc64a20SAneesh Kumar K.V 	if (!h_cgroup)
842bc64a20SAneesh Kumar K.V 		return ERR_PTR(-ENOMEM);
852bc64a20SAneesh Kumar K.V 
862bc64a20SAneesh Kumar K.V 	parent_cgroup = cgroup->parent;
872bc64a20SAneesh Kumar K.V 	if (parent_cgroup) {
882bc64a20SAneesh Kumar K.V 		parent_h_cgroup = hugetlb_cgroup_from_cgroup(parent_cgroup);
892bc64a20SAneesh Kumar K.V 		for (idx = 0; idx < HUGE_MAX_HSTATE; idx++)
902bc64a20SAneesh Kumar K.V 			res_counter_init(&h_cgroup->hugepage[idx],
912bc64a20SAneesh Kumar K.V 					 &parent_h_cgroup->hugepage[idx]);
922bc64a20SAneesh Kumar K.V 	} else {
932bc64a20SAneesh Kumar K.V 		root_h_cgroup = h_cgroup;
942bc64a20SAneesh Kumar K.V 		for (idx = 0; idx < HUGE_MAX_HSTATE; idx++)
952bc64a20SAneesh Kumar K.V 			res_counter_init(&h_cgroup->hugepage[idx], NULL);
962bc64a20SAneesh Kumar K.V 	}
972bc64a20SAneesh Kumar K.V 	return &h_cgroup->css;
982bc64a20SAneesh Kumar K.V }
992bc64a20SAneesh Kumar K.V 
1002bc64a20SAneesh Kumar K.V static void hugetlb_cgroup_destroy(struct cgroup *cgroup)
1012bc64a20SAneesh Kumar K.V {
1022bc64a20SAneesh Kumar K.V 	struct hugetlb_cgroup *h_cgroup;
1032bc64a20SAneesh Kumar K.V 
1042bc64a20SAneesh Kumar K.V 	h_cgroup = hugetlb_cgroup_from_cgroup(cgroup);
1052bc64a20SAneesh Kumar K.V 	kfree(h_cgroup);
1062bc64a20SAneesh Kumar K.V }
1072bc64a20SAneesh Kumar K.V 
108*da1def55SAneesh Kumar K.V 
109*da1def55SAneesh Kumar K.V /*
110*da1def55SAneesh Kumar K.V  * Should be called with hugetlb_lock held.
111*da1def55SAneesh Kumar K.V  * Since we are holding hugetlb_lock, pages cannot get moved from
112*da1def55SAneesh Kumar K.V  * active list or uncharged from the cgroup, So no need to get
113*da1def55SAneesh Kumar K.V  * page reference and test for page active here. This function
114*da1def55SAneesh Kumar K.V  * cannot fail.
115*da1def55SAneesh Kumar K.V  */
116*da1def55SAneesh Kumar K.V static void hugetlb_cgroup_move_parent(int idx, struct cgroup *cgroup,
117*da1def55SAneesh Kumar K.V 				       struct page *page)
118*da1def55SAneesh Kumar K.V {
119*da1def55SAneesh Kumar K.V 	int csize;
120*da1def55SAneesh Kumar K.V 	struct res_counter *counter;
121*da1def55SAneesh Kumar K.V 	struct res_counter *fail_res;
122*da1def55SAneesh Kumar K.V 	struct hugetlb_cgroup *page_hcg;
123*da1def55SAneesh Kumar K.V 	struct hugetlb_cgroup *h_cg   = hugetlb_cgroup_from_cgroup(cgroup);
124*da1def55SAneesh Kumar K.V 	struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(cgroup);
125*da1def55SAneesh Kumar K.V 
126*da1def55SAneesh Kumar K.V 	page_hcg = hugetlb_cgroup_from_page(page);
127*da1def55SAneesh Kumar K.V 	/*
128*da1def55SAneesh Kumar K.V 	 * We can have pages in active list without any cgroup
129*da1def55SAneesh Kumar K.V 	 * ie, hugepage with less than 3 pages. We can safely
130*da1def55SAneesh Kumar K.V 	 * ignore those pages.
131*da1def55SAneesh Kumar K.V 	 */
132*da1def55SAneesh Kumar K.V 	if (!page_hcg || page_hcg != h_cg)
133*da1def55SAneesh Kumar K.V 		goto out;
134*da1def55SAneesh Kumar K.V 
135*da1def55SAneesh Kumar K.V 	csize = PAGE_SIZE << compound_order(page);
136*da1def55SAneesh Kumar K.V 	if (!parent) {
137*da1def55SAneesh Kumar K.V 		parent = root_h_cgroup;
138*da1def55SAneesh Kumar K.V 		/* root has no limit */
139*da1def55SAneesh Kumar K.V 		res_counter_charge_nofail(&parent->hugepage[idx],
140*da1def55SAneesh Kumar K.V 					  csize, &fail_res);
141*da1def55SAneesh Kumar K.V 	}
142*da1def55SAneesh Kumar K.V 	counter = &h_cg->hugepage[idx];
143*da1def55SAneesh Kumar K.V 	res_counter_uncharge_until(counter, counter->parent, csize);
144*da1def55SAneesh Kumar K.V 
145*da1def55SAneesh Kumar K.V 	set_hugetlb_cgroup(page, parent);
146*da1def55SAneesh Kumar K.V out:
147*da1def55SAneesh Kumar K.V 	return;
148*da1def55SAneesh Kumar K.V }
149*da1def55SAneesh Kumar K.V 
150*da1def55SAneesh Kumar K.V /*
151*da1def55SAneesh Kumar K.V  * Force the hugetlb cgroup to empty the hugetlb resources by moving them to
152*da1def55SAneesh Kumar K.V  * the parent cgroup.
153*da1def55SAneesh Kumar K.V  */
1542bc64a20SAneesh Kumar K.V static int hugetlb_cgroup_pre_destroy(struct cgroup *cgroup)
1552bc64a20SAneesh Kumar K.V {
156*da1def55SAneesh Kumar K.V 	struct hstate *h;
157*da1def55SAneesh Kumar K.V 	struct page *page;
158*da1def55SAneesh Kumar K.V 	int ret = 0, idx = 0;
159*da1def55SAneesh Kumar K.V 
160*da1def55SAneesh Kumar K.V 	do {
161*da1def55SAneesh Kumar K.V 		if (cgroup_task_count(cgroup) ||
162*da1def55SAneesh Kumar K.V 		    !list_empty(&cgroup->children)) {
163*da1def55SAneesh Kumar K.V 			ret = -EBUSY;
164*da1def55SAneesh Kumar K.V 			goto out;
165*da1def55SAneesh Kumar K.V 		}
166*da1def55SAneesh Kumar K.V 		for_each_hstate(h) {
167*da1def55SAneesh Kumar K.V 			spin_lock(&hugetlb_lock);
168*da1def55SAneesh Kumar K.V 			list_for_each_entry(page, &h->hugepage_activelist, lru)
169*da1def55SAneesh Kumar K.V 				hugetlb_cgroup_move_parent(idx, cgroup, page);
170*da1def55SAneesh Kumar K.V 
171*da1def55SAneesh Kumar K.V 			spin_unlock(&hugetlb_lock);
172*da1def55SAneesh Kumar K.V 			idx++;
173*da1def55SAneesh Kumar K.V 		}
174*da1def55SAneesh Kumar K.V 		cond_resched();
175*da1def55SAneesh Kumar K.V 	} while (hugetlb_cgroup_have_usage(cgroup));
176*da1def55SAneesh Kumar K.V out:
177*da1def55SAneesh Kumar K.V 	return ret;
1782bc64a20SAneesh Kumar K.V }
1792bc64a20SAneesh Kumar K.V 
1806d76dcf4SAneesh Kumar K.V int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
1816d76dcf4SAneesh Kumar K.V 				 struct hugetlb_cgroup **ptr)
1826d76dcf4SAneesh Kumar K.V {
1836d76dcf4SAneesh Kumar K.V 	int ret = 0;
1846d76dcf4SAneesh Kumar K.V 	struct res_counter *fail_res;
1856d76dcf4SAneesh Kumar K.V 	struct hugetlb_cgroup *h_cg = NULL;
1866d76dcf4SAneesh Kumar K.V 	unsigned long csize = nr_pages * PAGE_SIZE;
1876d76dcf4SAneesh Kumar K.V 
1886d76dcf4SAneesh Kumar K.V 	if (hugetlb_cgroup_disabled())
1896d76dcf4SAneesh Kumar K.V 		goto done;
1906d76dcf4SAneesh Kumar K.V 	/*
1916d76dcf4SAneesh Kumar K.V 	 * We don't charge any cgroup if the compound page have less
1926d76dcf4SAneesh Kumar K.V 	 * than 3 pages.
1936d76dcf4SAneesh Kumar K.V 	 */
1946d76dcf4SAneesh Kumar K.V 	if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
1956d76dcf4SAneesh Kumar K.V 		goto done;
1966d76dcf4SAneesh Kumar K.V again:
1976d76dcf4SAneesh Kumar K.V 	rcu_read_lock();
1986d76dcf4SAneesh Kumar K.V 	h_cg = hugetlb_cgroup_from_task(current);
1996d76dcf4SAneesh Kumar K.V 	if (!css_tryget(&h_cg->css)) {
2006d76dcf4SAneesh Kumar K.V 		rcu_read_unlock();
2016d76dcf4SAneesh Kumar K.V 		goto again;
2026d76dcf4SAneesh Kumar K.V 	}
2036d76dcf4SAneesh Kumar K.V 	rcu_read_unlock();
2046d76dcf4SAneesh Kumar K.V 
2056d76dcf4SAneesh Kumar K.V 	ret = res_counter_charge(&h_cg->hugepage[idx], csize, &fail_res);
2066d76dcf4SAneesh Kumar K.V 	css_put(&h_cg->css);
2076d76dcf4SAneesh Kumar K.V done:
2086d76dcf4SAneesh Kumar K.V 	*ptr = h_cg;
2096d76dcf4SAneesh Kumar K.V 	return ret;
2106d76dcf4SAneesh Kumar K.V }
2116d76dcf4SAneesh Kumar K.V 
2126d76dcf4SAneesh Kumar K.V void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
2136d76dcf4SAneesh Kumar K.V 				  struct hugetlb_cgroup *h_cg,
2146d76dcf4SAneesh Kumar K.V 				  struct page *page)
2156d76dcf4SAneesh Kumar K.V {
2166d76dcf4SAneesh Kumar K.V 	if (hugetlb_cgroup_disabled() || !h_cg)
2176d76dcf4SAneesh Kumar K.V 		return;
2186d76dcf4SAneesh Kumar K.V 
2196d76dcf4SAneesh Kumar K.V 	spin_lock(&hugetlb_lock);
2206d76dcf4SAneesh Kumar K.V 	set_hugetlb_cgroup(page, h_cg);
2216d76dcf4SAneesh Kumar K.V 	spin_unlock(&hugetlb_lock);
2226d76dcf4SAneesh Kumar K.V 	return;
2236d76dcf4SAneesh Kumar K.V }
2246d76dcf4SAneesh Kumar K.V 
2256d76dcf4SAneesh Kumar K.V /*
2266d76dcf4SAneesh Kumar K.V  * Should be called with hugetlb_lock held
2276d76dcf4SAneesh Kumar K.V  */
2286d76dcf4SAneesh Kumar K.V void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
2296d76dcf4SAneesh Kumar K.V 				  struct page *page)
2306d76dcf4SAneesh Kumar K.V {
2316d76dcf4SAneesh Kumar K.V 	struct hugetlb_cgroup *h_cg;
2326d76dcf4SAneesh Kumar K.V 	unsigned long csize = nr_pages * PAGE_SIZE;
2336d76dcf4SAneesh Kumar K.V 
2346d76dcf4SAneesh Kumar K.V 	if (hugetlb_cgroup_disabled())
2356d76dcf4SAneesh Kumar K.V 		return;
2366d76dcf4SAneesh Kumar K.V 	VM_BUG_ON(!spin_is_locked(&hugetlb_lock));
2376d76dcf4SAneesh Kumar K.V 	h_cg = hugetlb_cgroup_from_page(page);
2386d76dcf4SAneesh Kumar K.V 	if (unlikely(!h_cg))
2396d76dcf4SAneesh Kumar K.V 		return;
2406d76dcf4SAneesh Kumar K.V 	set_hugetlb_cgroup(page, NULL);
2416d76dcf4SAneesh Kumar K.V 	res_counter_uncharge(&h_cg->hugepage[idx], csize);
2426d76dcf4SAneesh Kumar K.V 	return;
2436d76dcf4SAneesh Kumar K.V }
2446d76dcf4SAneesh Kumar K.V 
2456d76dcf4SAneesh Kumar K.V void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
2466d76dcf4SAneesh Kumar K.V 				    struct hugetlb_cgroup *h_cg)
2476d76dcf4SAneesh Kumar K.V {
2486d76dcf4SAneesh Kumar K.V 	unsigned long csize = nr_pages * PAGE_SIZE;
2496d76dcf4SAneesh Kumar K.V 
2506d76dcf4SAneesh Kumar K.V 	if (hugetlb_cgroup_disabled() || !h_cg)
2516d76dcf4SAneesh Kumar K.V 		return;
2526d76dcf4SAneesh Kumar K.V 
2536d76dcf4SAneesh Kumar K.V 	if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
2546d76dcf4SAneesh Kumar K.V 		return;
2556d76dcf4SAneesh Kumar K.V 
2566d76dcf4SAneesh Kumar K.V 	res_counter_uncharge(&h_cg->hugepage[idx], csize);
2576d76dcf4SAneesh Kumar K.V 	return;
2586d76dcf4SAneesh Kumar K.V }
2596d76dcf4SAneesh Kumar K.V 
2602bc64a20SAneesh Kumar K.V struct cgroup_subsys hugetlb_subsys = {
2612bc64a20SAneesh Kumar K.V 	.name = "hugetlb",
2622bc64a20SAneesh Kumar K.V 	.create     = hugetlb_cgroup_create,
2632bc64a20SAneesh Kumar K.V 	.pre_destroy = hugetlb_cgroup_pre_destroy,
2642bc64a20SAneesh Kumar K.V 	.destroy    = hugetlb_cgroup_destroy,
2652bc64a20SAneesh Kumar K.V 	.subsys_id  = hugetlb_subsys_id,
2662bc64a20SAneesh Kumar K.V };
267