1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Collaborative memory management interface.
4 *
5 * Copyright (C) 2008 IBM Corporation
6 * Author(s): Brian King (brking@linux.vnet.ibm.com),
7 */
8
9 #include <linux/ctype.h>
10 #include <linux/delay.h>
11 #include <linux/errno.h>
12 #include <linux/fs.h>
13 #include <linux/gfp.h>
14 #include <linux/kthread.h>
15 #include <linux/module.h>
16 #include <linux/oom.h>
17 #include <linux/reboot.h>
18 #include <linux/sched.h>
19 #include <linux/stringify.h>
20 #include <linux/swap.h>
21 #include <linux/device.h>
22 #include <linux/balloon.h>
23 #include <asm/firmware.h>
24 #include <asm/hvcall.h>
25 #include <asm/mmu.h>
26 #include <linux/uaccess.h>
27 #include <linux/memory.h>
28 #include <asm/plpar_wrappers.h>
29
30 #include "pseries.h"
31
32 #define CMM_DRIVER_VERSION "1.0.0"
33 #define CMM_DEFAULT_DELAY 1
34 #define CMM_HOTPLUG_DELAY 5
35 #define CMM_DEBUG 0
36 #define CMM_DISABLE 0
37 #define CMM_OOM_KB 1024
38 #define CMM_MIN_MEM_MB 256
39 #define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10))
40 #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
41
42 #define CMM_MEM_HOTPLUG_PRI 1
43
44 static unsigned int delay = CMM_DEFAULT_DELAY;
45 static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
46 static unsigned int oom_kb = CMM_OOM_KB;
47 static unsigned int cmm_debug = CMM_DEBUG;
48 static unsigned int cmm_disabled = CMM_DISABLE;
49 static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
50 static bool __read_mostly simulate;
51 static unsigned long simulate_loan_target_kb;
52 static struct device cmm_dev;
53
54 MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
55 MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
56 MODULE_LICENSE("GPL");
57 MODULE_VERSION(CMM_DRIVER_VERSION);
58
59 module_param_named(delay, delay, uint, 0644);
60 MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
61 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
62 module_param_named(hotplug_delay, hotplug_delay, uint, 0644);
63 MODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove "
64 "before loaning resumes. "
65 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
66 module_param_named(oom_kb, oom_kb, uint, 0644);
67 MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
68 "[Default=" __stringify(CMM_OOM_KB) "]");
69 module_param_named(min_mem_mb, min_mem_mb, ulong, 0644);
70 MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
71 "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
72 module_param_named(debug, cmm_debug, uint, 0644);
73 MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
74 "[Default=" __stringify(CMM_DEBUG) "]");
75 module_param_named(simulate, simulate, bool, 0444);
76 MODULE_PARM_DESC(simulate, "Enable simulation mode (no communication with hw).");
77
78 #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
79
80 static atomic_long_t loaned_pages;
81 static unsigned long loaned_pages_target;
82 static unsigned long oom_freed_pages;
83
84 static DEFINE_MUTEX(hotplug_mutex);
85 static int hotplug_occurred; /* protected by the hotplug mutex */
86
87 static struct task_struct *cmm_thread_ptr;
88 static struct balloon_dev_info b_dev_info;
89
plpar_page_set_loaned(struct page * page)90 static long plpar_page_set_loaned(struct page *page)
91 {
92 const unsigned long vpa = page_to_phys(page);
93 unsigned long cmo_page_sz = cmo_get_page_size();
94 long rc = 0;
95 int i;
96
97 if (unlikely(simulate))
98 return 0;
99
100 for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
101 rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0);
102
103 for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
104 plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE,
105 vpa + i - cmo_page_sz, 0);
106
107 return rc;
108 }
109
plpar_page_set_active(struct page * page)110 static long plpar_page_set_active(struct page *page)
111 {
112 const unsigned long vpa = page_to_phys(page);
113 unsigned long cmo_page_sz = cmo_get_page_size();
114 long rc = 0;
115 int i;
116
117 if (unlikely(simulate))
118 return 0;
119
120 for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
121 rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0);
122
123 for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
124 plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED,
125 vpa + i - cmo_page_sz, 0);
126
127 return rc;
128 }
129
130 /**
131 * cmm_alloc_pages - Allocate pages and mark them as loaned
132 * @nr: number of pages to allocate
133 *
134 * Return value:
135 * number of pages requested to be allocated which were not
136 **/
cmm_alloc_pages(long nr)137 static long cmm_alloc_pages(long nr)
138 {
139 struct page *page;
140 long rc;
141
142 cmm_dbg("Begin request for %ld pages\n", nr);
143
144 while (nr) {
145 /* Exit if a hotplug operation is in progress or occurred */
146 if (mutex_trylock(&hotplug_mutex)) {
147 if (hotplug_occurred) {
148 mutex_unlock(&hotplug_mutex);
149 break;
150 }
151 mutex_unlock(&hotplug_mutex);
152 } else {
153 break;
154 }
155
156 page = balloon_page_alloc();
157 if (!page)
158 break;
159 rc = plpar_page_set_loaned(page);
160 if (rc) {
161 pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc);
162 __free_page(page);
163 break;
164 }
165
166 balloon_page_enqueue(&b_dev_info, page);
167 atomic_long_inc(&loaned_pages);
168 nr--;
169 }
170
171 cmm_dbg("End request with %ld pages unfulfilled\n", nr);
172 return nr;
173 }
174
175 /**
176 * cmm_free_pages - Free pages and mark them as active
177 * @nr: number of pages to free
178 *
179 * Return value:
180 * number of pages requested to be freed which were not
181 **/
cmm_free_pages(long nr)182 static long cmm_free_pages(long nr)
183 {
184 struct page *page;
185
186 cmm_dbg("Begin free of %ld pages.\n", nr);
187 while (nr) {
188 page = balloon_page_dequeue(&b_dev_info);
189 if (!page)
190 break;
191 plpar_page_set_active(page);
192 __free_page(page);
193 atomic_long_dec(&loaned_pages);
194 nr--;
195 }
196 cmm_dbg("End request with %ld pages unfulfilled\n", nr);
197 return nr;
198 }
199
200 /**
201 * cmm_oom_notify - OOM notifier
202 * @self: notifier block struct
203 * @dummy: not used
204 * @parm: returned - number of pages freed
205 *
206 * Return value:
207 * NOTIFY_OK
208 **/
cmm_oom_notify(struct notifier_block * self,unsigned long dummy,void * parm)209 static int cmm_oom_notify(struct notifier_block *self,
210 unsigned long dummy, void *parm)
211 {
212 unsigned long *freed = parm;
213 long nr = KB2PAGES(oom_kb);
214
215 cmm_dbg("OOM processing started\n");
216 nr = cmm_free_pages(nr);
217 loaned_pages_target = atomic_long_read(&loaned_pages);
218 *freed += KB2PAGES(oom_kb) - nr;
219 oom_freed_pages += KB2PAGES(oom_kb) - nr;
220 cmm_dbg("OOM processing complete\n");
221 return NOTIFY_OK;
222 }
223
224 /**
225 * cmm_get_mpp - Read memory performance parameters
226 *
227 * Makes hcall to query the current page loan request from the hypervisor.
228 *
229 * Return value:
230 * nothing
231 **/
cmm_get_mpp(void)232 static void cmm_get_mpp(void)
233 {
234 const long __loaned_pages = atomic_long_read(&loaned_pages);
235 const long total_pages = totalram_pages() + __loaned_pages;
236 int rc;
237 struct hvcall_mpp_data mpp_data;
238 signed long active_pages_target, page_loan_request, target;
239 signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
240
241 if (likely(!simulate)) {
242 rc = h_get_mpp(&mpp_data);
243 if (rc != H_SUCCESS)
244 return;
245 page_loan_request = div_s64((s64)mpp_data.loan_request,
246 PAGE_SIZE);
247 target = page_loan_request + __loaned_pages;
248 } else {
249 target = KB2PAGES(simulate_loan_target_kb);
250 page_loan_request = target - __loaned_pages;
251 }
252
253 if (target < 0 || total_pages < min_mem_pages)
254 target = 0;
255
256 if (target > oom_freed_pages)
257 target -= oom_freed_pages;
258 else
259 target = 0;
260
261 active_pages_target = total_pages - target;
262
263 if (min_mem_pages > active_pages_target)
264 target = total_pages - min_mem_pages;
265
266 if (target < 0)
267 target = 0;
268
269 loaned_pages_target = target;
270
271 cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
272 page_loan_request, __loaned_pages, loaned_pages_target,
273 oom_freed_pages, totalram_pages());
274 }
275
276 static struct notifier_block cmm_oom_nb = {
277 .notifier_call = cmm_oom_notify
278 };
279
280 /**
281 * cmm_thread - CMM task thread
282 * @dummy: not used
283 *
284 * Return value:
285 * 0
286 **/
cmm_thread(void * dummy)287 static int cmm_thread(void *dummy)
288 {
289 unsigned long timeleft;
290 long __loaned_pages;
291
292 while (1) {
293 timeleft = msleep_interruptible(delay * 1000);
294
295 if (kthread_should_stop() || timeleft)
296 break;
297
298 if (mutex_trylock(&hotplug_mutex)) {
299 if (hotplug_occurred) {
300 hotplug_occurred = 0;
301 mutex_unlock(&hotplug_mutex);
302 cmm_dbg("Hotplug operation has occurred, "
303 "loaning activity suspended "
304 "for %d seconds.\n",
305 hotplug_delay);
306 timeleft = msleep_interruptible(hotplug_delay *
307 1000);
308 if (kthread_should_stop() || timeleft)
309 break;
310 continue;
311 }
312 mutex_unlock(&hotplug_mutex);
313 } else {
314 cmm_dbg("Hotplug operation in progress, activity "
315 "suspended\n");
316 continue;
317 }
318
319 cmm_get_mpp();
320
321 __loaned_pages = atomic_long_read(&loaned_pages);
322 if (loaned_pages_target > __loaned_pages) {
323 if (cmm_alloc_pages(loaned_pages_target - __loaned_pages))
324 loaned_pages_target = __loaned_pages;
325 } else if (loaned_pages_target < __loaned_pages)
326 cmm_free_pages(__loaned_pages - loaned_pages_target);
327 }
328 return 0;
329 }
330
331 #define CMM_SHOW(name, format, args...) \
332 static ssize_t show_##name(struct device *dev, \
333 struct device_attribute *attr, \
334 char *buf) \
335 { \
336 return sprintf(buf, format, ##args); \
337 } \
338 static DEVICE_ATTR(name, 0444, show_##name, NULL)
339
340 CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(atomic_long_read(&loaned_pages)));
341 CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
342
show_oom_pages(struct device * dev,struct device_attribute * attr,char * buf)343 static ssize_t show_oom_pages(struct device *dev,
344 struct device_attribute *attr, char *buf)
345 {
346 return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
347 }
348
store_oom_pages(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)349 static ssize_t store_oom_pages(struct device *dev,
350 struct device_attribute *attr,
351 const char *buf, size_t count)
352 {
353 unsigned long val = simple_strtoul (buf, NULL, 10);
354
355 if (!capable(CAP_SYS_ADMIN))
356 return -EPERM;
357 if (val != 0)
358 return -EBADMSG;
359
360 oom_freed_pages = 0;
361 return count;
362 }
363
364 static DEVICE_ATTR(oom_freed_kb, 0644,
365 show_oom_pages, store_oom_pages);
366
367 static struct device_attribute *cmm_attrs[] = {
368 &dev_attr_loaned_kb,
369 &dev_attr_loaned_target_kb,
370 &dev_attr_oom_freed_kb,
371 };
372
373 static DEVICE_ULONG_ATTR(simulate_loan_target_kb, 0644,
374 simulate_loan_target_kb);
375
376 static const struct bus_type cmm_subsys = {
377 .name = "cmm",
378 .dev_name = "cmm",
379 };
380
cmm_release_device(struct device * dev)381 static void cmm_release_device(struct device *dev)
382 {
383 }
384
385 /**
386 * cmm_sysfs_register - Register with sysfs
387 *
388 * Return value:
389 * 0 on success / other on failure
390 **/
cmm_sysfs_register(struct device * dev)391 static int cmm_sysfs_register(struct device *dev)
392 {
393 int i, rc;
394
395 if ((rc = subsys_system_register(&cmm_subsys, NULL)))
396 return rc;
397
398 dev->id = 0;
399 dev->bus = &cmm_subsys;
400 dev->release = cmm_release_device;
401
402 if ((rc = device_register(dev)))
403 goto subsys_unregister;
404
405 for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
406 if ((rc = device_create_file(dev, cmm_attrs[i])))
407 goto fail;
408 }
409
410 if (!simulate)
411 return 0;
412 rc = device_create_file(dev, &dev_attr_simulate_loan_target_kb.attr);
413 if (rc)
414 goto fail;
415 return 0;
416
417 fail:
418 while (--i >= 0)
419 device_remove_file(dev, cmm_attrs[i]);
420 device_unregister(dev);
421 subsys_unregister:
422 bus_unregister(&cmm_subsys);
423 return rc;
424 }
425
426 /**
427 * cmm_unregister_sysfs - Unregister from sysfs
428 *
429 **/
cmm_unregister_sysfs(struct device * dev)430 static void cmm_unregister_sysfs(struct device *dev)
431 {
432 int i;
433
434 for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
435 device_remove_file(dev, cmm_attrs[i]);
436 device_unregister(dev);
437 bus_unregister(&cmm_subsys);
438 }
439
440 /**
441 * cmm_reboot_notifier - Make sure pages are not still marked as "loaned"
442 *
443 **/
cmm_reboot_notifier(struct notifier_block * nb,unsigned long action,void * unused)444 static int cmm_reboot_notifier(struct notifier_block *nb,
445 unsigned long action, void *unused)
446 {
447 if (action == SYS_RESTART) {
448 if (cmm_thread_ptr)
449 kthread_stop(cmm_thread_ptr);
450 cmm_thread_ptr = NULL;
451 cmm_free_pages(atomic_long_read(&loaned_pages));
452 }
453 return NOTIFY_DONE;
454 }
455
456 static struct notifier_block cmm_reboot_nb = {
457 .notifier_call = cmm_reboot_notifier,
458 };
459
460 /**
461 * cmm_memory_cb - Handle memory hotplug notifier calls
462 * @self: notifier block struct
463 * @action: action to take
464 * @arg: struct memory_notify data for handler
465 *
466 * Return value:
467 * NOTIFY_OK or notifier error based on subfunction return value
468 *
469 **/
cmm_memory_cb(struct notifier_block * self,unsigned long action,void * arg)470 static int cmm_memory_cb(struct notifier_block *self,
471 unsigned long action, void *arg)
472 {
473 switch (action) {
474 case MEM_GOING_OFFLINE:
475 mutex_lock(&hotplug_mutex);
476 hotplug_occurred = 1;
477 break;
478 case MEM_OFFLINE:
479 case MEM_CANCEL_OFFLINE:
480 mutex_unlock(&hotplug_mutex);
481 cmm_dbg("Memory offline operation complete.\n");
482 break;
483 case MEM_GOING_ONLINE:
484 case MEM_ONLINE:
485 case MEM_CANCEL_ONLINE:
486 break;
487 }
488
489 return NOTIFY_OK;
490 }
491
492 static struct notifier_block cmm_mem_nb = {
493 .notifier_call = cmm_memory_cb,
494 .priority = CMM_MEM_HOTPLUG_PRI
495 };
496
497 #ifdef CONFIG_BALLOON_MIGRATION
cmm_migratepage(struct balloon_dev_info * b_dev_info,struct page * newpage,struct page * page,enum migrate_mode mode)498 static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
499 struct page *newpage, struct page *page,
500 enum migrate_mode mode)
501 {
502 /*
503 * loan/"inflate" the newpage first.
504 *
505 * We might race against the cmm_thread who might discover after our
506 * loan request that another page is to be unloaned. However, once
507 * the cmm_thread runs again later, this error will automatically
508 * be corrected.
509 */
510 if (plpar_page_set_loaned(newpage)) {
511 /* Unlikely, but possible. Tell the caller not to retry now. */
512 pr_err_ratelimited("%s: Cannot set page to loaned.", __func__);
513 return -EBUSY;
514 }
515
516 /*
517 * activate/"deflate" the old page. We ignore any errors just like the
518 * other callers.
519 */
520 plpar_page_set_active(page);
521 return 0;
522 }
523 #else /* CONFIG_BALLOON_MIGRATION */
524 int cmm_migratepage(struct balloon_dev_info *b_dev_info, struct page *newpage,
525 struct page *page, enum migrate_mode mode);
526 #endif /* CONFIG_BALLOON_MIGRATION */
527
528 /**
529 * cmm_init - Module initialization
530 *
531 * Return value:
532 * 0 on success / other on failure
533 **/
cmm_init(void)534 static int cmm_init(void)
535 {
536 int rc;
537
538 if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate)
539 return -EOPNOTSUPP;
540
541 balloon_devinfo_init(&b_dev_info);
542 b_dev_info.adjust_managed_page_count = true;
543 if (IS_ENABLED(CONFIG_BALLOON_MIGRATION))
544 b_dev_info.migratepage = cmm_migratepage;
545
546 rc = register_oom_notifier(&cmm_oom_nb);
547 if (rc < 0)
548 return rc;
549
550 if ((rc = register_reboot_notifier(&cmm_reboot_nb)))
551 goto out_oom_notifier;
552
553 if ((rc = cmm_sysfs_register(&cmm_dev)))
554 goto out_reboot_notifier;
555
556 rc = register_memory_notifier(&cmm_mem_nb);
557 if (rc)
558 goto out_unregister_notifier;
559
560 if (cmm_disabled)
561 return 0;
562
563 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
564 if (IS_ERR(cmm_thread_ptr)) {
565 rc = PTR_ERR(cmm_thread_ptr);
566 goto out_unregister_notifier;
567 }
568
569 return 0;
570 out_unregister_notifier:
571 unregister_memory_notifier(&cmm_mem_nb);
572 cmm_unregister_sysfs(&cmm_dev);
573 out_reboot_notifier:
574 unregister_reboot_notifier(&cmm_reboot_nb);
575 out_oom_notifier:
576 unregister_oom_notifier(&cmm_oom_nb);
577 return rc;
578 }
579
580 /**
581 * cmm_exit - Module exit
582 *
583 * Return value:
584 * nothing
585 **/
cmm_exit(void)586 static void cmm_exit(void)
587 {
588 if (cmm_thread_ptr)
589 kthread_stop(cmm_thread_ptr);
590 unregister_oom_notifier(&cmm_oom_nb);
591 unregister_reboot_notifier(&cmm_reboot_nb);
592 unregister_memory_notifier(&cmm_mem_nb);
593 cmm_free_pages(atomic_long_read(&loaned_pages));
594 cmm_unregister_sysfs(&cmm_dev);
595 }
596
597 /**
598 * cmm_set_disable - Disable/Enable CMM
599 *
600 * Return value:
601 * 0 on success / other on failure
602 **/
cmm_set_disable(const char * val,const struct kernel_param * kp)603 static int cmm_set_disable(const char *val, const struct kernel_param *kp)
604 {
605 int disable = simple_strtoul(val, NULL, 10);
606
607 if (disable != 0 && disable != 1)
608 return -EINVAL;
609
610 if (disable && !cmm_disabled) {
611 if (cmm_thread_ptr)
612 kthread_stop(cmm_thread_ptr);
613 cmm_thread_ptr = NULL;
614 cmm_free_pages(atomic_long_read(&loaned_pages));
615 } else if (!disable && cmm_disabled) {
616 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
617 if (IS_ERR(cmm_thread_ptr))
618 return PTR_ERR(cmm_thread_ptr);
619 }
620
621 cmm_disabled = disable;
622 return 0;
623 }
624
625 module_param_call(disable, cmm_set_disable, param_get_uint,
626 &cmm_disabled, 0644);
627 MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
628 "[Default=" __stringify(CMM_DISABLE) "]");
629
630 module_init(cmm_init);
631 module_exit(cmm_exit);
632