1 /* drivers/misc/lowmemorykiller.c
2  *
3  * The lowmemorykiller driver lets user-space specify a set of memory thresholds
4  * where processes with a range of oom_adj values will get killed. Specify the
5  * minimum oom_adj values in /sys/module/lowmemorykiller/parameters/adj and the
6  * number of free pages in /sys/module/lowmemorykiller/parameters/minfree. Both
7  * files take a comma separated list of numbers in ascending order.
8  *
9  * For example, write "0,8" to /sys/module/lowmemorykiller/parameters/adj and
10  * "1024,4096" to /sys/module/lowmemorykiller/parameters/minfree to kill
11  * processes with a oom_adj value of 8 or higher when the free memory drops
12  * below 4096 pages and kill processes with a oom_adj value of 0 or higher
13  * when the free memory drops below 1024 pages.
14  *
15  * The driver considers memory used for caches to be free, but if a large
16  * percentage of the cached memory is locked this can be very inaccurate
17  * and processes may not get killed until the normal oom killer is triggered.
18  *
19  * Copyright (C) 2007-2008 Google, Inc.
20  *
21  * This software is licensed under the terms of the GNU General Public
22  * License version 2, as published by the Free Software Foundation, and
23  * may be copied, distributed, and modified under those terms.
24  *
25  * This program is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
28  * GNU General Public License for more details.
29  *
30  */
31 
32 #include <linux/module.h>
33 #include <linux/kernel.h>
34 #include <linux/mm.h>
35 #include <linux/oom.h>
36 #include <linux/sched.h>
37 #include <linux/profile.h>
38 #include <linux/notifier.h>
39 
40 static uint32_t lowmem_debug_level = 2;
41 static int lowmem_adj[6] = {
42 	0,
43 	1,
44 	6,
45 	12,
46 };
47 static int lowmem_adj_size = 4;
48 static size_t lowmem_minfree[6] = {
49 	3 * 512,	/* 6MB */
50 	2 * 1024,	/* 8MB */
51 	4 * 1024,	/* 16MB */
52 	16 * 1024,	/* 64MB */
53 };
54 static int lowmem_minfree_size = 4;
55 
56 static struct task_struct *lowmem_deathpending;
57 static unsigned long lowmem_deathpending_timeout;
58 
59 #define lowmem_print(level, x...)			\
60 	do {						\
61 		if (lowmem_debug_level >= (level))	\
62 			printk(x);			\
63 	} while (0)
64 
65 static int
66 task_notify_func(struct notifier_block *self, unsigned long val, void *data);
67 
68 static struct notifier_block task_nb = {
69 	.notifier_call	= task_notify_func,
70 };
71 
72 static int
task_notify_func(struct notifier_block * self,unsigned long val,void * data)73 task_notify_func(struct notifier_block *self, unsigned long val, void *data)
74 {
75 	struct task_struct *task = data;
76 	if (task == lowmem_deathpending) {
77 		lowmem_deathpending = NULL;
78 		task_handoff_unregister(&task_nb);
79 	}
80 	return NOTIFY_OK;
81 }
82 
lowmem_shrink(struct shrinker * s,struct shrink_control * sc)83 static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
84 {
85 	struct task_struct *p;
86 	struct task_struct *selected = NULL;
87 	int rem = 0;
88 	int tasksize;
89 	int i;
90 	int min_adj = OOM_ADJUST_MAX + 1;
91 	int selected_tasksize = 0;
92 	int selected_oom_adj;
93 	int array_size = ARRAY_SIZE(lowmem_adj);
94 	int other_free = global_page_state(NR_FREE_PAGES);
95 	int other_file = global_page_state(NR_FILE_PAGES) -
96 						global_page_state(NR_SHMEM);
97 
98 	/*
99 	 * If we already have a death outstanding, then
100 	 * bail out right away; indicating to vmscan
101 	 * that we have nothing further to offer on
102 	 * this pass.
103 	 *
104 	 * Note: Currently you need CONFIG_PROFILING
105 	 * for this to work correctly.
106 	 */
107 	if (lowmem_deathpending &&
108 	    time_before_eq(jiffies, lowmem_deathpending_timeout))
109 		return 0;
110 
111 	if (lowmem_adj_size < array_size)
112 		array_size = lowmem_adj_size;
113 	if (lowmem_minfree_size < array_size)
114 		array_size = lowmem_minfree_size;
115 	for (i = 0; i < array_size; i++) {
116 		if (other_free < lowmem_minfree[i] &&
117 		    other_file < lowmem_minfree[i]) {
118 			min_adj = lowmem_adj[i];
119 			break;
120 		}
121 	}
122 	if (sc->nr_to_scan > 0)
123 		lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n",
124 				sc->nr_to_scan, sc->gfp_mask, other_free,
125 				other_file, min_adj);
126 	rem = global_page_state(NR_ACTIVE_ANON) +
127 		global_page_state(NR_ACTIVE_FILE) +
128 		global_page_state(NR_INACTIVE_ANON) +
129 		global_page_state(NR_INACTIVE_FILE);
130 	if (sc->nr_to_scan <= 0 || min_adj == OOM_ADJUST_MAX + 1) {
131 		lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n",
132 			     sc->nr_to_scan, sc->gfp_mask, rem);
133 		return rem;
134 	}
135 	selected_oom_adj = min_adj;
136 
137 	read_lock(&tasklist_lock);
138 	for_each_process(p) {
139 		struct mm_struct *mm;
140 		struct signal_struct *sig;
141 		int oom_adj;
142 
143 		task_lock(p);
144 		mm = p->mm;
145 		sig = p->signal;
146 		if (!mm || !sig) {
147 			task_unlock(p);
148 			continue;
149 		}
150 		oom_adj = sig->oom_adj;
151 		if (oom_adj < min_adj) {
152 			task_unlock(p);
153 			continue;
154 		}
155 		tasksize = get_mm_rss(mm);
156 		task_unlock(p);
157 		if (tasksize <= 0)
158 			continue;
159 		if (selected) {
160 			if (oom_adj < selected_oom_adj)
161 				continue;
162 			if (oom_adj == selected_oom_adj &&
163 			    tasksize <= selected_tasksize)
164 				continue;
165 		}
166 		selected = p;
167 		selected_tasksize = tasksize;
168 		selected_oom_adj = oom_adj;
169 		lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n",
170 			     p->pid, p->comm, oom_adj, tasksize);
171 	}
172 	if (selected) {
173 		lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n",
174 			     selected->pid, selected->comm,
175 			     selected_oom_adj, selected_tasksize);
176 		/*
177 		 * If CONFIG_PROFILING is off, then task_handoff_register()
178 		 * is a nop. In that case we don't want to stall the killer
179 		 * by setting lowmem_deathpending.
180 		 */
181 #ifdef CONFIG_PROFILING
182 		lowmem_deathpending = selected;
183 		lowmem_deathpending_timeout = jiffies + HZ;
184 		task_handoff_register(&task_nb);
185 #endif
186 		force_sig(SIGKILL, selected);
187 		rem -= selected_tasksize;
188 	}
189 	lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n",
190 		     sc->nr_to_scan, sc->gfp_mask, rem);
191 	read_unlock(&tasklist_lock);
192 	return rem;
193 }
194 
195 static struct shrinker lowmem_shrinker = {
196 	.shrink = lowmem_shrink,
197 	.seeks = DEFAULT_SEEKS * 16
198 };
199 
lowmem_init(void)200 static int __init lowmem_init(void)
201 {
202 	register_shrinker(&lowmem_shrinker);
203 	return 0;
204 }
205 
lowmem_exit(void)206 static void __exit lowmem_exit(void)
207 {
208 	unregister_shrinker(&lowmem_shrinker);
209 }
210 
211 module_param_named(cost, lowmem_shrinker.seeks, int, S_IRUGO | S_IWUSR);
212 module_param_array_named(adj, lowmem_adj, int, &lowmem_adj_size,
213 			 S_IRUGO | S_IWUSR);
214 module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size,
215 			 S_IRUGO | S_IWUSR);
216 module_param_named(debug_level, lowmem_debug_level, uint, S_IRUGO | S_IWUSR);
217 
218 module_init(lowmem_init);
219 module_exit(lowmem_exit);
220 
221 MODULE_LICENSE("GPL");
222 
223