12bc64a20SAneesh Kumar K.V /* 22bc64a20SAneesh Kumar K.V * 32bc64a20SAneesh Kumar K.V * Copyright IBM Corporation, 2012 42bc64a20SAneesh Kumar K.V * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 52bc64a20SAneesh Kumar K.V * 62bc64a20SAneesh Kumar K.V * This program is free software; you can redistribute it and/or modify it 72bc64a20SAneesh Kumar K.V * under the terms of version 2.1 of the GNU Lesser General Public License 82bc64a20SAneesh Kumar K.V * as published by the Free Software Foundation. 92bc64a20SAneesh Kumar K.V * 102bc64a20SAneesh Kumar K.V * This program is distributed in the hope that it would be useful, but 112bc64a20SAneesh Kumar K.V * WITHOUT ANY WARRANTY; without even the implied warranty of 122bc64a20SAneesh Kumar K.V * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 132bc64a20SAneesh Kumar K.V * 142bc64a20SAneesh Kumar K.V */ 152bc64a20SAneesh Kumar K.V 162bc64a20SAneesh Kumar K.V #include <linux/cgroup.h> 172bc64a20SAneesh Kumar K.V #include <linux/slab.h> 182bc64a20SAneesh Kumar K.V #include <linux/hugetlb.h> 192bc64a20SAneesh Kumar K.V #include <linux/hugetlb_cgroup.h> 202bc64a20SAneesh Kumar K.V 212bc64a20SAneesh Kumar K.V struct hugetlb_cgroup { 222bc64a20SAneesh Kumar K.V struct cgroup_subsys_state css; 232bc64a20SAneesh Kumar K.V /* 242bc64a20SAneesh Kumar K.V * the counter to account for hugepages from hugetlb. 252bc64a20SAneesh Kumar K.V */ 262bc64a20SAneesh Kumar K.V struct res_counter hugepage[HUGE_MAX_HSTATE]; 272bc64a20SAneesh Kumar K.V }; 282bc64a20SAneesh Kumar K.V 292bc64a20SAneesh Kumar K.V struct cgroup_subsys hugetlb_subsys __read_mostly; 302bc64a20SAneesh Kumar K.V static struct hugetlb_cgroup *root_h_cgroup __read_mostly; 312bc64a20SAneesh Kumar K.V 322bc64a20SAneesh Kumar K.V static inline 332bc64a20SAneesh Kumar K.V struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s) 342bc64a20SAneesh Kumar K.V { 352bc64a20SAneesh Kumar K.V return container_of(s, struct hugetlb_cgroup, css); 362bc64a20SAneesh Kumar K.V } 372bc64a20SAneesh Kumar K.V 382bc64a20SAneesh Kumar K.V static inline 392bc64a20SAneesh Kumar K.V struct hugetlb_cgroup *hugetlb_cgroup_from_cgroup(struct cgroup *cgroup) 402bc64a20SAneesh Kumar K.V { 412bc64a20SAneesh Kumar K.V return hugetlb_cgroup_from_css(cgroup_subsys_state(cgroup, 422bc64a20SAneesh Kumar K.V hugetlb_subsys_id)); 432bc64a20SAneesh Kumar K.V } 442bc64a20SAneesh Kumar K.V 452bc64a20SAneesh Kumar K.V static inline 462bc64a20SAneesh Kumar K.V struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task) 472bc64a20SAneesh Kumar K.V { 482bc64a20SAneesh Kumar K.V return hugetlb_cgroup_from_css(task_subsys_state(task, 492bc64a20SAneesh Kumar K.V hugetlb_subsys_id)); 502bc64a20SAneesh Kumar K.V } 512bc64a20SAneesh Kumar K.V 522bc64a20SAneesh Kumar K.V static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg) 532bc64a20SAneesh Kumar K.V { 542bc64a20SAneesh Kumar K.V return (h_cg == root_h_cgroup); 552bc64a20SAneesh Kumar K.V } 562bc64a20SAneesh Kumar K.V 572bc64a20SAneesh Kumar K.V static inline struct hugetlb_cgroup *parent_hugetlb_cgroup(struct cgroup *cg) 582bc64a20SAneesh Kumar K.V { 592bc64a20SAneesh Kumar K.V if (!cg->parent) 602bc64a20SAneesh Kumar K.V return NULL; 612bc64a20SAneesh Kumar K.V return hugetlb_cgroup_from_cgroup(cg->parent); 622bc64a20SAneesh Kumar K.V } 632bc64a20SAneesh Kumar K.V 642bc64a20SAneesh Kumar K.V static inline bool hugetlb_cgroup_have_usage(struct cgroup *cg) 652bc64a20SAneesh Kumar K.V { 662bc64a20SAneesh Kumar K.V int idx; 672bc64a20SAneesh Kumar K.V struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cg); 682bc64a20SAneesh Kumar K.V 692bc64a20SAneesh Kumar K.V for (idx = 0; idx < hugetlb_max_hstate; idx++) { 702bc64a20SAneesh Kumar K.V if ((res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE)) > 0) 712bc64a20SAneesh Kumar K.V return true; 722bc64a20SAneesh Kumar K.V } 732bc64a20SAneesh Kumar K.V return false; 742bc64a20SAneesh Kumar K.V } 752bc64a20SAneesh Kumar K.V 762bc64a20SAneesh Kumar K.V static struct cgroup_subsys_state *hugetlb_cgroup_create(struct cgroup *cgroup) 772bc64a20SAneesh Kumar K.V { 782bc64a20SAneesh Kumar K.V int idx; 792bc64a20SAneesh Kumar K.V struct cgroup *parent_cgroup; 802bc64a20SAneesh Kumar K.V struct hugetlb_cgroup *h_cgroup, *parent_h_cgroup; 812bc64a20SAneesh Kumar K.V 822bc64a20SAneesh Kumar K.V h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL); 832bc64a20SAneesh Kumar K.V if (!h_cgroup) 842bc64a20SAneesh Kumar K.V return ERR_PTR(-ENOMEM); 852bc64a20SAneesh Kumar K.V 862bc64a20SAneesh Kumar K.V parent_cgroup = cgroup->parent; 872bc64a20SAneesh Kumar K.V if (parent_cgroup) { 882bc64a20SAneesh Kumar K.V parent_h_cgroup = hugetlb_cgroup_from_cgroup(parent_cgroup); 892bc64a20SAneesh Kumar K.V for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) 902bc64a20SAneesh Kumar K.V res_counter_init(&h_cgroup->hugepage[idx], 912bc64a20SAneesh Kumar K.V &parent_h_cgroup->hugepage[idx]); 922bc64a20SAneesh Kumar K.V } else { 932bc64a20SAneesh Kumar K.V root_h_cgroup = h_cgroup; 942bc64a20SAneesh Kumar K.V for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) 952bc64a20SAneesh Kumar K.V res_counter_init(&h_cgroup->hugepage[idx], NULL); 962bc64a20SAneesh Kumar K.V } 972bc64a20SAneesh Kumar K.V return &h_cgroup->css; 982bc64a20SAneesh Kumar K.V } 992bc64a20SAneesh Kumar K.V 1002bc64a20SAneesh Kumar K.V static void hugetlb_cgroup_destroy(struct cgroup *cgroup) 1012bc64a20SAneesh Kumar K.V { 1022bc64a20SAneesh Kumar K.V struct hugetlb_cgroup *h_cgroup; 1032bc64a20SAneesh Kumar K.V 1042bc64a20SAneesh Kumar K.V h_cgroup = hugetlb_cgroup_from_cgroup(cgroup); 1052bc64a20SAneesh Kumar K.V kfree(h_cgroup); 1062bc64a20SAneesh Kumar K.V } 1072bc64a20SAneesh Kumar K.V 108*da1def55SAneesh Kumar K.V 109*da1def55SAneesh Kumar K.V /* 110*da1def55SAneesh Kumar K.V * Should be called with hugetlb_lock held. 111*da1def55SAneesh Kumar K.V * Since we are holding hugetlb_lock, pages cannot get moved from 112*da1def55SAneesh Kumar K.V * active list or uncharged from the cgroup, So no need to get 113*da1def55SAneesh Kumar K.V * page reference and test for page active here. This function 114*da1def55SAneesh Kumar K.V * cannot fail. 115*da1def55SAneesh Kumar K.V */ 116*da1def55SAneesh Kumar K.V static void hugetlb_cgroup_move_parent(int idx, struct cgroup *cgroup, 117*da1def55SAneesh Kumar K.V struct page *page) 118*da1def55SAneesh Kumar K.V { 119*da1def55SAneesh Kumar K.V int csize; 120*da1def55SAneesh Kumar K.V struct res_counter *counter; 121*da1def55SAneesh Kumar K.V struct res_counter *fail_res; 122*da1def55SAneesh Kumar K.V struct hugetlb_cgroup *page_hcg; 123*da1def55SAneesh Kumar K.V struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cgroup); 124*da1def55SAneesh Kumar K.V struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(cgroup); 125*da1def55SAneesh Kumar K.V 126*da1def55SAneesh Kumar K.V page_hcg = hugetlb_cgroup_from_page(page); 127*da1def55SAneesh Kumar K.V /* 128*da1def55SAneesh Kumar K.V * We can have pages in active list without any cgroup 129*da1def55SAneesh Kumar K.V * ie, hugepage with less than 3 pages. We can safely 130*da1def55SAneesh Kumar K.V * ignore those pages. 131*da1def55SAneesh Kumar K.V */ 132*da1def55SAneesh Kumar K.V if (!page_hcg || page_hcg != h_cg) 133*da1def55SAneesh Kumar K.V goto out; 134*da1def55SAneesh Kumar K.V 135*da1def55SAneesh Kumar K.V csize = PAGE_SIZE << compound_order(page); 136*da1def55SAneesh Kumar K.V if (!parent) { 137*da1def55SAneesh Kumar K.V parent = root_h_cgroup; 138*da1def55SAneesh Kumar K.V /* root has no limit */ 139*da1def55SAneesh Kumar K.V res_counter_charge_nofail(&parent->hugepage[idx], 140*da1def55SAneesh Kumar K.V csize, &fail_res); 141*da1def55SAneesh Kumar K.V } 142*da1def55SAneesh Kumar K.V counter = &h_cg->hugepage[idx]; 143*da1def55SAneesh Kumar K.V res_counter_uncharge_until(counter, counter->parent, csize); 144*da1def55SAneesh Kumar K.V 145*da1def55SAneesh Kumar K.V set_hugetlb_cgroup(page, parent); 146*da1def55SAneesh Kumar K.V out: 147*da1def55SAneesh Kumar K.V return; 148*da1def55SAneesh Kumar K.V } 149*da1def55SAneesh Kumar K.V 150*da1def55SAneesh Kumar K.V /* 151*da1def55SAneesh Kumar K.V * Force the hugetlb cgroup to empty the hugetlb resources by moving them to 152*da1def55SAneesh Kumar K.V * the parent cgroup. 153*da1def55SAneesh Kumar K.V */ 1542bc64a20SAneesh Kumar K.V static int hugetlb_cgroup_pre_destroy(struct cgroup *cgroup) 1552bc64a20SAneesh Kumar K.V { 156*da1def55SAneesh Kumar K.V struct hstate *h; 157*da1def55SAneesh Kumar K.V struct page *page; 158*da1def55SAneesh Kumar K.V int ret = 0, idx = 0; 159*da1def55SAneesh Kumar K.V 160*da1def55SAneesh Kumar K.V do { 161*da1def55SAneesh Kumar K.V if (cgroup_task_count(cgroup) || 162*da1def55SAneesh Kumar K.V !list_empty(&cgroup->children)) { 163*da1def55SAneesh Kumar K.V ret = -EBUSY; 164*da1def55SAneesh Kumar K.V goto out; 165*da1def55SAneesh Kumar K.V } 166*da1def55SAneesh Kumar K.V for_each_hstate(h) { 167*da1def55SAneesh Kumar K.V spin_lock(&hugetlb_lock); 168*da1def55SAneesh Kumar K.V list_for_each_entry(page, &h->hugepage_activelist, lru) 169*da1def55SAneesh Kumar K.V hugetlb_cgroup_move_parent(idx, cgroup, page); 170*da1def55SAneesh Kumar K.V 171*da1def55SAneesh Kumar K.V spin_unlock(&hugetlb_lock); 172*da1def55SAneesh Kumar K.V idx++; 173*da1def55SAneesh Kumar K.V } 174*da1def55SAneesh Kumar K.V cond_resched(); 175*da1def55SAneesh Kumar K.V } while (hugetlb_cgroup_have_usage(cgroup)); 176*da1def55SAneesh Kumar K.V out: 177*da1def55SAneesh Kumar K.V return ret; 1782bc64a20SAneesh Kumar K.V } 1792bc64a20SAneesh Kumar K.V 1806d76dcf4SAneesh Kumar K.V int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, 1816d76dcf4SAneesh Kumar K.V struct hugetlb_cgroup **ptr) 1826d76dcf4SAneesh Kumar K.V { 1836d76dcf4SAneesh Kumar K.V int ret = 0; 1846d76dcf4SAneesh Kumar K.V struct res_counter *fail_res; 1856d76dcf4SAneesh Kumar K.V struct hugetlb_cgroup *h_cg = NULL; 1866d76dcf4SAneesh Kumar K.V unsigned long csize = nr_pages * PAGE_SIZE; 1876d76dcf4SAneesh Kumar K.V 1886d76dcf4SAneesh Kumar K.V if (hugetlb_cgroup_disabled()) 1896d76dcf4SAneesh Kumar K.V goto done; 1906d76dcf4SAneesh Kumar K.V /* 1916d76dcf4SAneesh Kumar K.V * We don't charge any cgroup if the compound page have less 1926d76dcf4SAneesh Kumar K.V * than 3 pages. 1936d76dcf4SAneesh Kumar K.V */ 1946d76dcf4SAneesh Kumar K.V if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) 1956d76dcf4SAneesh Kumar K.V goto done; 1966d76dcf4SAneesh Kumar K.V again: 1976d76dcf4SAneesh Kumar K.V rcu_read_lock(); 1986d76dcf4SAneesh Kumar K.V h_cg = hugetlb_cgroup_from_task(current); 1996d76dcf4SAneesh Kumar K.V if (!css_tryget(&h_cg->css)) { 2006d76dcf4SAneesh Kumar K.V rcu_read_unlock(); 2016d76dcf4SAneesh Kumar K.V goto again; 2026d76dcf4SAneesh Kumar K.V } 2036d76dcf4SAneesh Kumar K.V rcu_read_unlock(); 2046d76dcf4SAneesh Kumar K.V 2056d76dcf4SAneesh Kumar K.V ret = res_counter_charge(&h_cg->hugepage[idx], csize, &fail_res); 2066d76dcf4SAneesh Kumar K.V css_put(&h_cg->css); 2076d76dcf4SAneesh Kumar K.V done: 2086d76dcf4SAneesh Kumar K.V *ptr = h_cg; 2096d76dcf4SAneesh Kumar K.V return ret; 2106d76dcf4SAneesh Kumar K.V } 2116d76dcf4SAneesh Kumar K.V 2126d76dcf4SAneesh Kumar K.V void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, 2136d76dcf4SAneesh Kumar K.V struct hugetlb_cgroup *h_cg, 2146d76dcf4SAneesh Kumar K.V struct page *page) 2156d76dcf4SAneesh Kumar K.V { 2166d76dcf4SAneesh Kumar K.V if (hugetlb_cgroup_disabled() || !h_cg) 2176d76dcf4SAneesh Kumar K.V return; 2186d76dcf4SAneesh Kumar K.V 2196d76dcf4SAneesh Kumar K.V spin_lock(&hugetlb_lock); 2206d76dcf4SAneesh Kumar K.V set_hugetlb_cgroup(page, h_cg); 2216d76dcf4SAneesh Kumar K.V spin_unlock(&hugetlb_lock); 2226d76dcf4SAneesh Kumar K.V return; 2236d76dcf4SAneesh Kumar K.V } 2246d76dcf4SAneesh Kumar K.V 2256d76dcf4SAneesh Kumar K.V /* 2266d76dcf4SAneesh Kumar K.V * Should be called with hugetlb_lock held 2276d76dcf4SAneesh Kumar K.V */ 2286d76dcf4SAneesh Kumar K.V void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, 2296d76dcf4SAneesh Kumar K.V struct page *page) 2306d76dcf4SAneesh Kumar K.V { 2316d76dcf4SAneesh Kumar K.V struct hugetlb_cgroup *h_cg; 2326d76dcf4SAneesh Kumar K.V unsigned long csize = nr_pages * PAGE_SIZE; 2336d76dcf4SAneesh Kumar K.V 2346d76dcf4SAneesh Kumar K.V if (hugetlb_cgroup_disabled()) 2356d76dcf4SAneesh Kumar K.V return; 2366d76dcf4SAneesh Kumar K.V VM_BUG_ON(!spin_is_locked(&hugetlb_lock)); 2376d76dcf4SAneesh Kumar K.V h_cg = hugetlb_cgroup_from_page(page); 2386d76dcf4SAneesh Kumar K.V if (unlikely(!h_cg)) 2396d76dcf4SAneesh Kumar K.V return; 2406d76dcf4SAneesh Kumar K.V set_hugetlb_cgroup(page, NULL); 2416d76dcf4SAneesh Kumar K.V res_counter_uncharge(&h_cg->hugepage[idx], csize); 2426d76dcf4SAneesh Kumar K.V return; 2436d76dcf4SAneesh Kumar K.V } 2446d76dcf4SAneesh Kumar K.V 2456d76dcf4SAneesh Kumar K.V void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 2466d76dcf4SAneesh Kumar K.V struct hugetlb_cgroup *h_cg) 2476d76dcf4SAneesh Kumar K.V { 2486d76dcf4SAneesh Kumar K.V unsigned long csize = nr_pages * PAGE_SIZE; 2496d76dcf4SAneesh Kumar K.V 2506d76dcf4SAneesh Kumar K.V if (hugetlb_cgroup_disabled() || !h_cg) 2516d76dcf4SAneesh Kumar K.V return; 2526d76dcf4SAneesh Kumar K.V 2536d76dcf4SAneesh Kumar K.V if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) 2546d76dcf4SAneesh Kumar K.V return; 2556d76dcf4SAneesh Kumar K.V 2566d76dcf4SAneesh Kumar K.V res_counter_uncharge(&h_cg->hugepage[idx], csize); 2576d76dcf4SAneesh Kumar K.V return; 2586d76dcf4SAneesh Kumar K.V } 2596d76dcf4SAneesh Kumar K.V 2602bc64a20SAneesh Kumar K.V struct cgroup_subsys hugetlb_subsys = { 2612bc64a20SAneesh Kumar K.V .name = "hugetlb", 2622bc64a20SAneesh Kumar K.V .create = hugetlb_cgroup_create, 2632bc64a20SAneesh Kumar K.V .pre_destroy = hugetlb_cgroup_pre_destroy, 2642bc64a20SAneesh Kumar K.V .destroy = hugetlb_cgroup_destroy, 2652bc64a20SAneesh Kumar K.V .subsys_id = hugetlb_subsys_id, 2662bc64a20SAneesh Kumar K.V }; 267