1 /* SPDX-License-Identifier: GPL-2.0 */
2 #define _GNU_SOURCE
3
4 #include <linux/limits.h>
5 #include <linux/oom.h>
6 #include <fcntl.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <sys/stat.h>
11 #include <sys/types.h>
12 #include <unistd.h>
13 #include <sys/socket.h>
14 #include <sys/wait.h>
15 #include <arpa/inet.h>
16 #include <netinet/in.h>
17 #include <netdb.h>
18 #include <errno.h>
19 #include <sys/mman.h>
20
21 #include "../kselftest.h"
22 #include "cgroup_util.h"
23
24 static bool has_localevents;
25 static bool has_recursiveprot;
26
get_temp_fd(void)27 int get_temp_fd(void)
28 {
29 return open(".", O_TMPFILE | O_RDWR | O_EXCL);
30 }
31
alloc_pagecache(int fd,size_t size)32 int alloc_pagecache(int fd, size_t size)
33 {
34 char buf[PAGE_SIZE];
35 struct stat st;
36 int i;
37
38 if (fstat(fd, &st))
39 goto cleanup;
40
41 size += st.st_size;
42
43 if (ftruncate(fd, size))
44 goto cleanup;
45
46 for (i = 0; i < size; i += sizeof(buf))
47 read(fd, buf, sizeof(buf));
48
49 return 0;
50
51 cleanup:
52 return -1;
53 }
54
alloc_anon(const char * cgroup,void * arg)55 int alloc_anon(const char *cgroup, void *arg)
56 {
57 size_t size = (unsigned long)arg;
58 char *buf, *ptr;
59
60 buf = malloc(size);
61 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
62 *ptr = 0;
63
64 free(buf);
65 return 0;
66 }
67
is_swap_enabled(void)68 int is_swap_enabled(void)
69 {
70 char buf[PAGE_SIZE];
71 const char delim[] = "\n";
72 int cnt = 0;
73 char *line;
74
75 if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
76 return -1;
77
78 for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
79 cnt++;
80
81 return cnt > 1;
82 }
83
set_oom_adj_score(int pid,int score)84 int set_oom_adj_score(int pid, int score)
85 {
86 char path[PATH_MAX];
87 int fd, len;
88
89 sprintf(path, "/proc/%d/oom_score_adj", pid);
90
91 fd = open(path, O_WRONLY | O_APPEND);
92 if (fd < 0)
93 return fd;
94
95 len = dprintf(fd, "%d", score);
96 if (len < 0) {
97 close(fd);
98 return len;
99 }
100
101 close(fd);
102 return 0;
103 }
104
105 /*
106 * This test creates two nested cgroups with and without enabling
107 * the memory controller.
108 */
test_memcg_subtree_control(const char * root)109 static int test_memcg_subtree_control(const char *root)
110 {
111 char *parent, *child, *parent2 = NULL, *child2 = NULL;
112 int ret = KSFT_FAIL;
113 char buf[PAGE_SIZE];
114
115 /* Create two nested cgroups with the memory controller enabled */
116 parent = cg_name(root, "memcg_test_0");
117 child = cg_name(root, "memcg_test_0/memcg_test_1");
118 if (!parent || !child)
119 goto cleanup_free;
120
121 if (cg_create(parent))
122 goto cleanup_free;
123
124 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
125 goto cleanup_parent;
126
127 if (cg_create(child))
128 goto cleanup_parent;
129
130 if (cg_read_strstr(child, "cgroup.controllers", "memory"))
131 goto cleanup_child;
132
133 /* Create two nested cgroups without enabling memory controller */
134 parent2 = cg_name(root, "memcg_test_1");
135 child2 = cg_name(root, "memcg_test_1/memcg_test_1");
136 if (!parent2 || !child2)
137 goto cleanup_free2;
138
139 if (cg_create(parent2))
140 goto cleanup_free2;
141
142 if (cg_create(child2))
143 goto cleanup_parent2;
144
145 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
146 goto cleanup_all;
147
148 if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
149 goto cleanup_all;
150
151 ret = KSFT_PASS;
152
153 cleanup_all:
154 cg_destroy(child2);
155 cleanup_parent2:
156 cg_destroy(parent2);
157 cleanup_free2:
158 free(parent2);
159 free(child2);
160 cleanup_child:
161 cg_destroy(child);
162 cleanup_parent:
163 cg_destroy(parent);
164 cleanup_free:
165 free(parent);
166 free(child);
167
168 return ret;
169 }
170
alloc_anon_50M_check(const char * cgroup,void * arg)171 static int alloc_anon_50M_check(const char *cgroup, void *arg)
172 {
173 size_t size = MB(50);
174 char *buf, *ptr;
175 long anon, current;
176 int ret = -1;
177
178 buf = malloc(size);
179 if (buf == NULL) {
180 fprintf(stderr, "malloc() failed\n");
181 return -1;
182 }
183
184 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
185 *ptr = 0;
186
187 current = cg_read_long(cgroup, "memory.current");
188 if (current < size)
189 goto cleanup;
190
191 if (!values_close(size, current, 3))
192 goto cleanup;
193
194 anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
195 if (anon < 0)
196 goto cleanup;
197
198 if (!values_close(anon, current, 3))
199 goto cleanup;
200
201 ret = 0;
202 cleanup:
203 free(buf);
204 return ret;
205 }
206
alloc_pagecache_50M_check(const char * cgroup,void * arg)207 static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
208 {
209 size_t size = MB(50);
210 int ret = -1;
211 long current, file;
212 int fd;
213
214 fd = get_temp_fd();
215 if (fd < 0)
216 return -1;
217
218 if (alloc_pagecache(fd, size))
219 goto cleanup;
220
221 current = cg_read_long(cgroup, "memory.current");
222 if (current < size)
223 goto cleanup;
224
225 file = cg_read_key_long(cgroup, "memory.stat", "file ");
226 if (file < 0)
227 goto cleanup;
228
229 if (!values_close(file, current, 10))
230 goto cleanup;
231
232 ret = 0;
233
234 cleanup:
235 close(fd);
236 return ret;
237 }
238
239 /*
240 * This test create a memory cgroup, allocates
241 * some anonymous memory and some pagecache
242 * and checks memory.current, memory.peak, and some memory.stat values.
243 */
test_memcg_current_peak(const char * root)244 static int test_memcg_current_peak(const char *root)
245 {
246 int ret = KSFT_FAIL;
247 long current, peak, peak_reset;
248 char *memcg;
249 bool fd2_closed = false, fd3_closed = false, fd4_closed = false;
250 int peak_fd = -1, peak_fd2 = -1, peak_fd3 = -1, peak_fd4 = -1;
251 struct stat ss;
252
253 memcg = cg_name(root, "memcg_test");
254 if (!memcg)
255 goto cleanup;
256
257 if (cg_create(memcg))
258 goto cleanup;
259
260 current = cg_read_long(memcg, "memory.current");
261 if (current != 0)
262 goto cleanup;
263
264 peak = cg_read_long(memcg, "memory.peak");
265 if (peak != 0)
266 goto cleanup;
267
268 if (cg_run(memcg, alloc_anon_50M_check, NULL))
269 goto cleanup;
270
271 peak = cg_read_long(memcg, "memory.peak");
272 if (peak < MB(50))
273 goto cleanup;
274
275 /*
276 * We'll open a few FDs for the same memory.peak file to exercise the free-path
277 * We need at least three to be closed in a different order than writes occurred to test
278 * the linked-list handling.
279 */
280 peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
281
282 if (peak_fd == -1) {
283 if (errno == ENOENT)
284 ret = KSFT_SKIP;
285 goto cleanup;
286 }
287
288 /*
289 * Before we try to use memory.peak's fd, try to figure out whether
290 * this kernel supports writing to that file in the first place. (by
291 * checking the writable bit on the file's st_mode)
292 */
293 if (fstat(peak_fd, &ss))
294 goto cleanup;
295
296 if ((ss.st_mode & S_IWUSR) == 0) {
297 ret = KSFT_SKIP;
298 goto cleanup;
299 }
300
301 peak_fd2 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
302
303 if (peak_fd2 == -1)
304 goto cleanup;
305
306 peak_fd3 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
307
308 if (peak_fd3 == -1)
309 goto cleanup;
310
311 /* any non-empty string resets, but make it clear */
312 static const char reset_string[] = "reset\n";
313
314 peak_reset = write(peak_fd, reset_string, sizeof(reset_string));
315 if (peak_reset != sizeof(reset_string))
316 goto cleanup;
317
318 peak_reset = write(peak_fd2, reset_string, sizeof(reset_string));
319 if (peak_reset != sizeof(reset_string))
320 goto cleanup;
321
322 peak_reset = write(peak_fd3, reset_string, sizeof(reset_string));
323 if (peak_reset != sizeof(reset_string))
324 goto cleanup;
325
326 /* Make sure a completely independent read isn't affected by our FD-local reset above*/
327 peak = cg_read_long(memcg, "memory.peak");
328 if (peak < MB(50))
329 goto cleanup;
330
331 fd2_closed = true;
332 if (close(peak_fd2))
333 goto cleanup;
334
335 peak_fd4 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
336
337 if (peak_fd4 == -1)
338 goto cleanup;
339
340 peak_reset = write(peak_fd4, reset_string, sizeof(reset_string));
341 if (peak_reset != sizeof(reset_string))
342 goto cleanup;
343
344 peak = cg_read_long_fd(peak_fd);
345 if (peak > MB(30) || peak < 0)
346 goto cleanup;
347
348 if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
349 goto cleanup;
350
351 peak = cg_read_long(memcg, "memory.peak");
352 if (peak < MB(50))
353 goto cleanup;
354
355 /* Make sure everything is back to normal */
356 peak = cg_read_long_fd(peak_fd);
357 if (peak < MB(50))
358 goto cleanup;
359
360 peak = cg_read_long_fd(peak_fd4);
361 if (peak < MB(50))
362 goto cleanup;
363
364 fd3_closed = true;
365 if (close(peak_fd3))
366 goto cleanup;
367
368 fd4_closed = true;
369 if (close(peak_fd4))
370 goto cleanup;
371
372 ret = KSFT_PASS;
373
374 cleanup:
375 close(peak_fd);
376 if (!fd2_closed)
377 close(peak_fd2);
378 if (!fd3_closed)
379 close(peak_fd3);
380 if (!fd4_closed)
381 close(peak_fd4);
382 cg_destroy(memcg);
383 free(memcg);
384
385 return ret;
386 }
387
alloc_pagecache_50M_noexit(const char * cgroup,void * arg)388 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
389 {
390 int fd = (long)arg;
391 int ppid = getppid();
392
393 if (alloc_pagecache(fd, MB(50)))
394 return -1;
395
396 while (getppid() == ppid)
397 sleep(1);
398
399 return 0;
400 }
401
alloc_anon_noexit(const char * cgroup,void * arg)402 static int alloc_anon_noexit(const char *cgroup, void *arg)
403 {
404 int ppid = getppid();
405 size_t size = (unsigned long)arg;
406 char *buf, *ptr;
407
408 buf = malloc(size);
409 if (buf == NULL) {
410 fprintf(stderr, "malloc() failed\n");
411 return -1;
412 }
413
414 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
415 *ptr = 0;
416
417 while (getppid() == ppid)
418 sleep(1);
419
420 free(buf);
421 return 0;
422 }
423
424 /*
425 * Wait until processes are killed asynchronously by the OOM killer
426 * If we exceed a timeout, fail.
427 */
cg_test_proc_killed(const char * cgroup)428 static int cg_test_proc_killed(const char *cgroup)
429 {
430 int limit;
431
432 for (limit = 10; limit > 0; limit--) {
433 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
434 return 0;
435
436 usleep(100000);
437 }
438 return -1;
439 }
440
441 static bool reclaim_until(const char *memcg, long goal);
442
443 /*
444 * First, this test creates the following hierarchy:
445 * A memory.min = 0, memory.max = 200M
446 * A/B memory.min = 50M
447 * A/B/C memory.min = 75M, memory.current = 50M
448 * A/B/D memory.min = 25M, memory.current = 50M
449 * A/B/E memory.min = 0, memory.current = 50M
450 * A/B/F memory.min = 500M, memory.current = 0
451 *
452 * (or memory.low if we test soft protection)
453 *
454 * Usages are pagecache and the test keeps a running
455 * process in every leaf cgroup.
456 * Then it creates A/G and creates a significant
457 * memory pressure in A.
458 *
459 * Then it checks actual memory usages and expects that:
460 * A/B memory.current ~= 50M
461 * A/B/C memory.current ~= 29M [memory.events:low > 0]
462 * A/B/D memory.current ~= 21M [memory.events:low > 0]
463 * A/B/E memory.current ~= 0 [memory.events:low == 0 if !memory_recursiveprot,
464 * undefined otherwise]
465 * A/B/F memory.current = 0 [memory.events:low == 0]
466 * (for origin of the numbers, see model in memcg_protection.m.)
467 *
468 * After that it tries to allocate more than there is
469 * unprotected memory in A available, and checks that:
470 * a) memory.min protects pagecache even in this case,
471 * b) memory.low allows reclaiming page cache with low events.
472 *
473 * Then we try to reclaim from A/B/C using memory.reclaim until its
474 * usage reaches 10M.
475 * This makes sure that:
476 * (a) We ignore the protection of the reclaim target memcg.
477 * (b) The previously calculated emin value (~29M) should be dismissed.
478 */
test_memcg_protection(const char * root,bool min)479 static int test_memcg_protection(const char *root, bool min)
480 {
481 int ret = KSFT_FAIL, rc;
482 char *parent[3] = {NULL};
483 char *children[4] = {NULL};
484 const char *attribute = min ? "memory.min" : "memory.low";
485 long c[4];
486 long current;
487 int i, attempts;
488 int fd;
489
490 fd = get_temp_fd();
491 if (fd < 0)
492 goto cleanup;
493
494 parent[0] = cg_name(root, "memcg_test_0");
495 if (!parent[0])
496 goto cleanup;
497
498 parent[1] = cg_name(parent[0], "memcg_test_1");
499 if (!parent[1])
500 goto cleanup;
501
502 parent[2] = cg_name(parent[0], "memcg_test_2");
503 if (!parent[2])
504 goto cleanup;
505
506 if (cg_create(parent[0]))
507 goto cleanup;
508
509 if (cg_read_long(parent[0], attribute)) {
510 /* No memory.min on older kernels is fine */
511 if (min)
512 ret = KSFT_SKIP;
513 goto cleanup;
514 }
515
516 if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
517 goto cleanup;
518
519 if (cg_write(parent[0], "memory.max", "200M"))
520 goto cleanup;
521
522 if (cg_write(parent[0], "memory.swap.max", "0"))
523 goto cleanup;
524
525 if (cg_create(parent[1]))
526 goto cleanup;
527
528 if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
529 goto cleanup;
530
531 if (cg_create(parent[2]))
532 goto cleanup;
533
534 for (i = 0; i < ARRAY_SIZE(children); i++) {
535 children[i] = cg_name_indexed(parent[1], "child_memcg", i);
536 if (!children[i])
537 goto cleanup;
538
539 if (cg_create(children[i]))
540 goto cleanup;
541
542 if (i > 2)
543 continue;
544
545 cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
546 (void *)(long)fd);
547 }
548
549 if (cg_write(parent[1], attribute, "50M"))
550 goto cleanup;
551 if (cg_write(children[0], attribute, "75M"))
552 goto cleanup;
553 if (cg_write(children[1], attribute, "25M"))
554 goto cleanup;
555 if (cg_write(children[2], attribute, "0"))
556 goto cleanup;
557 if (cg_write(children[3], attribute, "500M"))
558 goto cleanup;
559
560 attempts = 0;
561 while (!values_close(cg_read_long(parent[1], "memory.current"),
562 MB(150), 3)) {
563 if (attempts++ > 5)
564 break;
565 sleep(1);
566 }
567
568 if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
569 goto cleanup;
570
571 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
572 goto cleanup;
573
574 for (i = 0; i < ARRAY_SIZE(children); i++)
575 c[i] = cg_read_long(children[i], "memory.current");
576
577 if (!values_close(c[0], MB(29), 15))
578 goto cleanup;
579
580 if (!values_close(c[1], MB(21), 20))
581 goto cleanup;
582
583 if (c[3] != 0)
584 goto cleanup;
585
586 rc = cg_run(parent[2], alloc_anon, (void *)MB(170));
587 if (min && !rc)
588 goto cleanup;
589 else if (!min && rc) {
590 fprintf(stderr,
591 "memory.low prevents from allocating anon memory\n");
592 goto cleanup;
593 }
594
595 current = min ? MB(50) : MB(30);
596 if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3))
597 goto cleanup;
598
599 if (!reclaim_until(children[0], MB(10)))
600 goto cleanup;
601
602 if (min) {
603 ret = KSFT_PASS;
604 goto cleanup;
605 }
606
607 /*
608 * Child 2 has memory.low=0, but some low protection may still be
609 * distributed down from its parent with memory.low=50M if cgroup2
610 * memory_recursiveprot mount option is enabled. Ignore the low
611 * event count in this case.
612 */
613 for (i = 0; i < ARRAY_SIZE(children); i++) {
614 int ignore_low_events_index = has_recursiveprot ? 2 : -1;
615 int no_low_events_index = 1;
616 long low, oom;
617
618 oom = cg_read_key_long(children[i], "memory.events", "oom ");
619 low = cg_read_key_long(children[i], "memory.events", "low ");
620
621 if (oom)
622 goto cleanup;
623 if (i == ignore_low_events_index)
624 continue;
625 if (i <= no_low_events_index && low <= 0)
626 goto cleanup;
627 if (i > no_low_events_index && low)
628 goto cleanup;
629
630 }
631
632 ret = KSFT_PASS;
633
634 cleanup:
635 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
636 if (!children[i])
637 continue;
638
639 cg_destroy(children[i]);
640 free(children[i]);
641 }
642
643 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
644 if (!parent[i])
645 continue;
646
647 cg_destroy(parent[i]);
648 free(parent[i]);
649 }
650 close(fd);
651 return ret;
652 }
653
test_memcg_min(const char * root)654 static int test_memcg_min(const char *root)
655 {
656 return test_memcg_protection(root, true);
657 }
658
test_memcg_low(const char * root)659 static int test_memcg_low(const char *root)
660 {
661 return test_memcg_protection(root, false);
662 }
663
alloc_pagecache_max_30M(const char * cgroup,void * arg)664 static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
665 {
666 size_t size = MB(50);
667 int ret = -1;
668 long current, high, max;
669 int fd;
670
671 high = cg_read_long(cgroup, "memory.high");
672 max = cg_read_long(cgroup, "memory.max");
673 if (high != MB(30) && max != MB(30))
674 return -1;
675
676 fd = get_temp_fd();
677 if (fd < 0)
678 return -1;
679
680 if (alloc_pagecache(fd, size))
681 goto cleanup;
682
683 current = cg_read_long(cgroup, "memory.current");
684 if (!values_close(current, MB(30), 5))
685 goto cleanup;
686
687 ret = 0;
688
689 cleanup:
690 close(fd);
691 return ret;
692
693 }
694
695 /*
696 * This test checks that memory.high limits the amount of
697 * memory which can be consumed by either anonymous memory
698 * or pagecache.
699 */
test_memcg_high(const char * root)700 static int test_memcg_high(const char *root)
701 {
702 int ret = KSFT_FAIL;
703 char *memcg;
704 long high;
705
706 memcg = cg_name(root, "memcg_test");
707 if (!memcg)
708 goto cleanup;
709
710 if (cg_create(memcg))
711 goto cleanup;
712
713 if (cg_read_strcmp(memcg, "memory.high", "max\n"))
714 goto cleanup;
715
716 if (cg_write(memcg, "memory.swap.max", "0"))
717 goto cleanup;
718
719 if (cg_write(memcg, "memory.high", "30M"))
720 goto cleanup;
721
722 if (cg_run(memcg, alloc_anon, (void *)MB(31)))
723 goto cleanup;
724
725 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
726 goto cleanup;
727
728 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
729 goto cleanup;
730
731 high = cg_read_key_long(memcg, "memory.events", "high ");
732 if (high <= 0)
733 goto cleanup;
734
735 ret = KSFT_PASS;
736
737 cleanup:
738 cg_destroy(memcg);
739 free(memcg);
740
741 return ret;
742 }
743
alloc_anon_mlock(const char * cgroup,void * arg)744 static int alloc_anon_mlock(const char *cgroup, void *arg)
745 {
746 size_t size = (size_t)arg;
747 void *buf;
748
749 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
750 0, 0);
751 if (buf == MAP_FAILED)
752 return -1;
753
754 mlock(buf, size);
755 munmap(buf, size);
756 return 0;
757 }
758
759 /*
760 * This test checks that memory.high is able to throttle big single shot
761 * allocation i.e. large allocation within one kernel entry.
762 */
test_memcg_high_sync(const char * root)763 static int test_memcg_high_sync(const char *root)
764 {
765 int ret = KSFT_FAIL, pid, fd = -1;
766 char *memcg;
767 long pre_high, pre_max;
768 long post_high, post_max;
769
770 memcg = cg_name(root, "memcg_test");
771 if (!memcg)
772 goto cleanup;
773
774 if (cg_create(memcg))
775 goto cleanup;
776
777 pre_high = cg_read_key_long(memcg, "memory.events", "high ");
778 pre_max = cg_read_key_long(memcg, "memory.events", "max ");
779 if (pre_high < 0 || pre_max < 0)
780 goto cleanup;
781
782 if (cg_write(memcg, "memory.swap.max", "0"))
783 goto cleanup;
784
785 if (cg_write(memcg, "memory.high", "30M"))
786 goto cleanup;
787
788 if (cg_write(memcg, "memory.max", "140M"))
789 goto cleanup;
790
791 fd = memcg_prepare_for_wait(memcg);
792 if (fd < 0)
793 goto cleanup;
794
795 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200));
796 if (pid < 0)
797 goto cleanup;
798
799 cg_wait_for(fd);
800
801 post_high = cg_read_key_long(memcg, "memory.events", "high ");
802 post_max = cg_read_key_long(memcg, "memory.events", "max ");
803 if (post_high < 0 || post_max < 0)
804 goto cleanup;
805
806 if (pre_high == post_high || pre_max != post_max)
807 goto cleanup;
808
809 ret = KSFT_PASS;
810
811 cleanup:
812 if (fd >= 0)
813 close(fd);
814 cg_destroy(memcg);
815 free(memcg);
816
817 return ret;
818 }
819
820 /*
821 * This test checks that memory.max limits the amount of
822 * memory which can be consumed by either anonymous memory
823 * or pagecache.
824 */
test_memcg_max(const char * root)825 static int test_memcg_max(const char *root)
826 {
827 int ret = KSFT_FAIL;
828 char *memcg;
829 long current, max;
830
831 memcg = cg_name(root, "memcg_test");
832 if (!memcg)
833 goto cleanup;
834
835 if (cg_create(memcg))
836 goto cleanup;
837
838 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
839 goto cleanup;
840
841 if (cg_write(memcg, "memory.swap.max", "0"))
842 goto cleanup;
843
844 if (cg_write(memcg, "memory.max", "30M"))
845 goto cleanup;
846
847 /* Should be killed by OOM killer */
848 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
849 goto cleanup;
850
851 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
852 goto cleanup;
853
854 current = cg_read_long(memcg, "memory.current");
855 if (current > MB(30) || !current)
856 goto cleanup;
857
858 max = cg_read_key_long(memcg, "memory.events", "max ");
859 if (max <= 0)
860 goto cleanup;
861
862 ret = KSFT_PASS;
863
864 cleanup:
865 cg_destroy(memcg);
866 free(memcg);
867
868 return ret;
869 }
870
871 /*
872 * Reclaim from @memcg until usage reaches @goal by writing to
873 * memory.reclaim.
874 *
875 * This function will return false if the usage is already below the
876 * goal.
877 *
878 * This function assumes that writing to memory.reclaim is the only
879 * source of change in memory.current (no concurrent allocations or
880 * reclaim).
881 *
882 * This function makes sure memory.reclaim is sane. It will return
883 * false if memory.reclaim's error codes do not make sense, even if
884 * the usage goal was satisfied.
885 */
reclaim_until(const char * memcg,long goal)886 static bool reclaim_until(const char *memcg, long goal)
887 {
888 char buf[64];
889 int retries, err;
890 long current, to_reclaim;
891 bool reclaimed = false;
892
893 for (retries = 5; retries > 0; retries--) {
894 current = cg_read_long(memcg, "memory.current");
895
896 if (current < goal || values_close(current, goal, 3))
897 break;
898 /* Did memory.reclaim return 0 incorrectly? */
899 else if (reclaimed)
900 return false;
901
902 to_reclaim = current - goal;
903 snprintf(buf, sizeof(buf), "%ld", to_reclaim);
904 err = cg_write(memcg, "memory.reclaim", buf);
905 if (!err)
906 reclaimed = true;
907 else if (err != -EAGAIN)
908 return false;
909 }
910 return reclaimed;
911 }
912
913 /*
914 * This test checks that memory.reclaim reclaims the given
915 * amount of memory (from both anon and file, if possible).
916 */
test_memcg_reclaim(const char * root)917 static int test_memcg_reclaim(const char *root)
918 {
919 int ret = KSFT_FAIL;
920 int fd = -1;
921 int retries;
922 char *memcg;
923 long current, expected_usage;
924
925 memcg = cg_name(root, "memcg_test");
926 if (!memcg)
927 goto cleanup;
928
929 if (cg_create(memcg))
930 goto cleanup;
931
932 current = cg_read_long(memcg, "memory.current");
933 if (current != 0)
934 goto cleanup;
935
936 fd = get_temp_fd();
937 if (fd < 0)
938 goto cleanup;
939
940 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd);
941
942 /*
943 * If swap is enabled, try to reclaim from both anon and file, else try
944 * to reclaim from file only.
945 */
946 if (is_swap_enabled()) {
947 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50));
948 expected_usage = MB(100);
949 } else
950 expected_usage = MB(50);
951
952 /*
953 * Wait until current usage reaches the expected usage (or we run out of
954 * retries).
955 */
956 retries = 5;
957 while (!values_close(cg_read_long(memcg, "memory.current"),
958 expected_usage, 10)) {
959 if (retries--) {
960 sleep(1);
961 continue;
962 } else {
963 fprintf(stderr,
964 "failed to allocate %ld for memcg reclaim test\n",
965 expected_usage);
966 goto cleanup;
967 }
968 }
969
970 /*
971 * Reclaim until current reaches 30M, this makes sure we hit both anon
972 * and file if swap is enabled.
973 */
974 if (!reclaim_until(memcg, MB(30)))
975 goto cleanup;
976
977 ret = KSFT_PASS;
978 cleanup:
979 cg_destroy(memcg);
980 free(memcg);
981 close(fd);
982
983 return ret;
984 }
985
alloc_anon_50M_check_swap(const char * cgroup,void * arg)986 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
987 {
988 long mem_max = (long)arg;
989 size_t size = MB(50);
990 char *buf, *ptr;
991 long mem_current, swap_current;
992 int ret = -1;
993
994 buf = malloc(size);
995 if (buf == NULL) {
996 fprintf(stderr, "malloc() failed\n");
997 return -1;
998 }
999
1000 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
1001 *ptr = 0;
1002
1003 mem_current = cg_read_long(cgroup, "memory.current");
1004 if (!mem_current || !values_close(mem_current, mem_max, 3))
1005 goto cleanup;
1006
1007 swap_current = cg_read_long(cgroup, "memory.swap.current");
1008 if (!swap_current ||
1009 !values_close(mem_current + swap_current, size, 3))
1010 goto cleanup;
1011
1012 ret = 0;
1013 cleanup:
1014 free(buf);
1015 return ret;
1016 }
1017
1018 /*
1019 * This test checks that memory.swap.max limits the amount of
1020 * anonymous memory which can be swapped out. Additionally, it verifies that
1021 * memory.swap.peak reflects the high watermark and can be reset.
1022 */
test_memcg_swap_max_peak(const char * root)1023 static int test_memcg_swap_max_peak(const char *root)
1024 {
1025 int ret = KSFT_FAIL;
1026 char *memcg;
1027 long max, peak;
1028 struct stat ss;
1029 int swap_peak_fd = -1, mem_peak_fd = -1;
1030
1031 /* any non-empty string resets */
1032 static const char reset_string[] = "foobarbaz";
1033
1034 if (!is_swap_enabled())
1035 return KSFT_SKIP;
1036
1037 memcg = cg_name(root, "memcg_test");
1038 if (!memcg)
1039 goto cleanup;
1040
1041 if (cg_create(memcg))
1042 goto cleanup;
1043
1044 if (cg_read_long(memcg, "memory.swap.current")) {
1045 ret = KSFT_SKIP;
1046 goto cleanup;
1047 }
1048
1049 swap_peak_fd = cg_open(memcg, "memory.swap.peak",
1050 O_RDWR | O_APPEND | O_CLOEXEC);
1051
1052 if (swap_peak_fd == -1) {
1053 if (errno == ENOENT)
1054 ret = KSFT_SKIP;
1055 goto cleanup;
1056 }
1057
1058 /*
1059 * Before we try to use memory.swap.peak's fd, try to figure out
1060 * whether this kernel supports writing to that file in the first
1061 * place. (by checking the writable bit on the file's st_mode)
1062 */
1063 if (fstat(swap_peak_fd, &ss))
1064 goto cleanup;
1065
1066 if ((ss.st_mode & S_IWUSR) == 0) {
1067 ret = KSFT_SKIP;
1068 goto cleanup;
1069 }
1070
1071 mem_peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
1072
1073 if (mem_peak_fd == -1)
1074 goto cleanup;
1075
1076 if (cg_read_long(memcg, "memory.swap.peak"))
1077 goto cleanup;
1078
1079 if (cg_read_long_fd(swap_peak_fd))
1080 goto cleanup;
1081
1082 /* switch the swap and mem fds into local-peak tracking mode*/
1083 int peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string));
1084
1085 if (peak_reset != sizeof(reset_string))
1086 goto cleanup;
1087
1088 if (cg_read_long_fd(swap_peak_fd))
1089 goto cleanup;
1090
1091 if (cg_read_long(memcg, "memory.peak"))
1092 goto cleanup;
1093
1094 if (cg_read_long_fd(mem_peak_fd))
1095 goto cleanup;
1096
1097 peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string));
1098 if (peak_reset != sizeof(reset_string))
1099 goto cleanup;
1100
1101 if (cg_read_long_fd(mem_peak_fd))
1102 goto cleanup;
1103
1104 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
1105 goto cleanup;
1106
1107 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
1108 goto cleanup;
1109
1110 if (cg_write(memcg, "memory.swap.max", "30M"))
1111 goto cleanup;
1112
1113 if (cg_write(memcg, "memory.max", "30M"))
1114 goto cleanup;
1115
1116 /* Should be killed by OOM killer */
1117 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1118 goto cleanup;
1119
1120 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
1121 goto cleanup;
1122
1123 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
1124 goto cleanup;
1125
1126 peak = cg_read_long(memcg, "memory.peak");
1127 if (peak < MB(29))
1128 goto cleanup;
1129
1130 peak = cg_read_long(memcg, "memory.swap.peak");
1131 if (peak < MB(29))
1132 goto cleanup;
1133
1134 peak = cg_read_long_fd(mem_peak_fd);
1135 if (peak < MB(29))
1136 goto cleanup;
1137
1138 peak = cg_read_long_fd(swap_peak_fd);
1139 if (peak < MB(29))
1140 goto cleanup;
1141
1142 /*
1143 * open, reset and close the peak swap on another FD to make sure
1144 * multiple extant fds don't corrupt the linked-list
1145 */
1146 peak_reset = cg_write(memcg, "memory.swap.peak", (char *)reset_string);
1147 if (peak_reset)
1148 goto cleanup;
1149
1150 peak_reset = cg_write(memcg, "memory.peak", (char *)reset_string);
1151 if (peak_reset)
1152 goto cleanup;
1153
1154 /* actually reset on the fds */
1155 peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string));
1156 if (peak_reset != sizeof(reset_string))
1157 goto cleanup;
1158
1159 peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string));
1160 if (peak_reset != sizeof(reset_string))
1161 goto cleanup;
1162
1163 peak = cg_read_long_fd(swap_peak_fd);
1164 if (peak > MB(10))
1165 goto cleanup;
1166
1167 /*
1168 * The cgroup is now empty, but there may be a page or two associated
1169 * with the open FD accounted to it.
1170 */
1171 peak = cg_read_long_fd(mem_peak_fd);
1172 if (peak > MB(1))
1173 goto cleanup;
1174
1175 if (cg_read_long(memcg, "memory.peak") < MB(29))
1176 goto cleanup;
1177
1178 if (cg_read_long(memcg, "memory.swap.peak") < MB(29))
1179 goto cleanup;
1180
1181 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
1182 goto cleanup;
1183
1184 max = cg_read_key_long(memcg, "memory.events", "max ");
1185 if (max <= 0)
1186 goto cleanup;
1187
1188 peak = cg_read_long(memcg, "memory.peak");
1189 if (peak < MB(29))
1190 goto cleanup;
1191
1192 peak = cg_read_long(memcg, "memory.swap.peak");
1193 if (peak < MB(29))
1194 goto cleanup;
1195
1196 peak = cg_read_long_fd(mem_peak_fd);
1197 if (peak < MB(29))
1198 goto cleanup;
1199
1200 peak = cg_read_long_fd(swap_peak_fd);
1201 if (peak < MB(19))
1202 goto cleanup;
1203
1204 ret = KSFT_PASS;
1205
1206 cleanup:
1207 if (mem_peak_fd != -1 && close(mem_peak_fd))
1208 ret = KSFT_FAIL;
1209 if (swap_peak_fd != -1 && close(swap_peak_fd))
1210 ret = KSFT_FAIL;
1211 cg_destroy(memcg);
1212 free(memcg);
1213
1214 return ret;
1215 }
1216
1217 /*
1218 * This test disables swapping and tries to allocate anonymous memory
1219 * up to OOM. Then it checks for oom and oom_kill events in
1220 * memory.events.
1221 */
test_memcg_oom_events(const char * root)1222 static int test_memcg_oom_events(const char *root)
1223 {
1224 int ret = KSFT_FAIL;
1225 char *memcg;
1226
1227 memcg = cg_name(root, "memcg_test");
1228 if (!memcg)
1229 goto cleanup;
1230
1231 if (cg_create(memcg))
1232 goto cleanup;
1233
1234 if (cg_write(memcg, "memory.max", "30M"))
1235 goto cleanup;
1236
1237 if (cg_write(memcg, "memory.swap.max", "0"))
1238 goto cleanup;
1239
1240 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1241 goto cleanup;
1242
1243 if (cg_read_strcmp(memcg, "cgroup.procs", ""))
1244 goto cleanup;
1245
1246 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
1247 goto cleanup;
1248
1249 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
1250 goto cleanup;
1251
1252 ret = KSFT_PASS;
1253
1254 cleanup:
1255 cg_destroy(memcg);
1256 free(memcg);
1257
1258 return ret;
1259 }
1260
1261 struct tcp_server_args {
1262 unsigned short port;
1263 int ctl[2];
1264 };
1265
tcp_server(const char * cgroup,void * arg)1266 static int tcp_server(const char *cgroup, void *arg)
1267 {
1268 struct tcp_server_args *srv_args = arg;
1269 struct sockaddr_in6 saddr = { 0 };
1270 socklen_t slen = sizeof(saddr);
1271 int sk, client_sk, ctl_fd, yes = 1, ret = -1;
1272
1273 close(srv_args->ctl[0]);
1274 ctl_fd = srv_args->ctl[1];
1275
1276 saddr.sin6_family = AF_INET6;
1277 saddr.sin6_addr = in6addr_any;
1278 saddr.sin6_port = htons(srv_args->port);
1279
1280 sk = socket(AF_INET6, SOCK_STREAM, 0);
1281 if (sk < 0)
1282 return ret;
1283
1284 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
1285 goto cleanup;
1286
1287 if (bind(sk, (struct sockaddr *)&saddr, slen)) {
1288 write(ctl_fd, &errno, sizeof(errno));
1289 goto cleanup;
1290 }
1291
1292 if (listen(sk, 1))
1293 goto cleanup;
1294
1295 ret = 0;
1296 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
1297 ret = -1;
1298 goto cleanup;
1299 }
1300
1301 client_sk = accept(sk, NULL, NULL);
1302 if (client_sk < 0)
1303 goto cleanup;
1304
1305 ret = -1;
1306 for (;;) {
1307 uint8_t buf[0x100000];
1308
1309 if (write(client_sk, buf, sizeof(buf)) <= 0) {
1310 if (errno == ECONNRESET)
1311 ret = 0;
1312 break;
1313 }
1314 }
1315
1316 close(client_sk);
1317
1318 cleanup:
1319 close(sk);
1320 return ret;
1321 }
1322
tcp_client(const char * cgroup,unsigned short port)1323 static int tcp_client(const char *cgroup, unsigned short port)
1324 {
1325 const char server[] = "localhost";
1326 struct addrinfo *ai;
1327 char servport[6];
1328 int retries = 0x10; /* nice round number */
1329 int sk, ret;
1330 long allocated;
1331
1332 allocated = cg_read_long(cgroup, "memory.current");
1333 snprintf(servport, sizeof(servport), "%hd", port);
1334 ret = getaddrinfo(server, servport, NULL, &ai);
1335 if (ret)
1336 return ret;
1337
1338 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
1339 if (sk < 0)
1340 goto free_ainfo;
1341
1342 ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
1343 if (ret < 0)
1344 goto close_sk;
1345
1346 ret = KSFT_FAIL;
1347 while (retries--) {
1348 uint8_t buf[0x100000];
1349 long current, sock;
1350
1351 if (read(sk, buf, sizeof(buf)) <= 0)
1352 goto close_sk;
1353
1354 current = cg_read_long(cgroup, "memory.current");
1355 sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
1356
1357 if (current < 0 || sock < 0)
1358 goto close_sk;
1359
1360 /* exclude the memory not related to socket connection */
1361 if (values_close(current - allocated, sock, 10)) {
1362 ret = KSFT_PASS;
1363 break;
1364 }
1365 }
1366
1367 close_sk:
1368 close(sk);
1369 free_ainfo:
1370 freeaddrinfo(ai);
1371 return ret;
1372 }
1373
1374 /*
1375 * This test checks socket memory accounting.
1376 * The test forks a TCP server listens on a random port between 1000
1377 * and 61000. Once it gets a client connection, it starts writing to
1378 * its socket.
1379 * The TCP client interleaves reads from the socket with check whether
1380 * memory.current and memory.stat.sock are similar.
1381 */
test_memcg_sock(const char * root)1382 static int test_memcg_sock(const char *root)
1383 {
1384 int bind_retries = 5, ret = KSFT_FAIL, pid, err;
1385 unsigned short port;
1386 char *memcg;
1387
1388 memcg = cg_name(root, "memcg_test");
1389 if (!memcg)
1390 goto cleanup;
1391
1392 if (cg_create(memcg))
1393 goto cleanup;
1394
1395 while (bind_retries--) {
1396 struct tcp_server_args args;
1397
1398 if (pipe(args.ctl))
1399 goto cleanup;
1400
1401 port = args.port = 1000 + rand() % 60000;
1402
1403 pid = cg_run_nowait(memcg, tcp_server, &args);
1404 if (pid < 0)
1405 goto cleanup;
1406
1407 close(args.ctl[1]);
1408 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
1409 goto cleanup;
1410 close(args.ctl[0]);
1411
1412 if (!err)
1413 break;
1414 if (err != EADDRINUSE)
1415 goto cleanup;
1416
1417 waitpid(pid, NULL, 0);
1418 }
1419
1420 if (err == EADDRINUSE) {
1421 ret = KSFT_SKIP;
1422 goto cleanup;
1423 }
1424
1425 if (tcp_client(memcg, port) != KSFT_PASS)
1426 goto cleanup;
1427
1428 waitpid(pid, &err, 0);
1429 if (WEXITSTATUS(err))
1430 goto cleanup;
1431
1432 if (cg_read_long(memcg, "memory.current") < 0)
1433 goto cleanup;
1434
1435 if (cg_read_key_long(memcg, "memory.stat", "sock "))
1436 goto cleanup;
1437
1438 ret = KSFT_PASS;
1439
1440 cleanup:
1441 cg_destroy(memcg);
1442 free(memcg);
1443
1444 return ret;
1445 }
1446
1447 /*
1448 * This test disables swapping and tries to allocate anonymous memory
1449 * up to OOM with memory.group.oom set. Then it checks that all
1450 * processes in the leaf were killed. It also checks that oom_events
1451 * were propagated to the parent level.
1452 */
test_memcg_oom_group_leaf_events(const char * root)1453 static int test_memcg_oom_group_leaf_events(const char *root)
1454 {
1455 int ret = KSFT_FAIL;
1456 char *parent, *child;
1457 long parent_oom_events;
1458
1459 parent = cg_name(root, "memcg_test_0");
1460 child = cg_name(root, "memcg_test_0/memcg_test_1");
1461
1462 if (!parent || !child)
1463 goto cleanup;
1464
1465 if (cg_create(parent))
1466 goto cleanup;
1467
1468 if (cg_create(child))
1469 goto cleanup;
1470
1471 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
1472 goto cleanup;
1473
1474 if (cg_write(child, "memory.max", "50M"))
1475 goto cleanup;
1476
1477 if (cg_write(child, "memory.swap.max", "0"))
1478 goto cleanup;
1479
1480 if (cg_write(child, "memory.oom.group", "1"))
1481 goto cleanup;
1482
1483 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1484 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1485 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1486 if (!cg_run(child, alloc_anon, (void *)MB(100)))
1487 goto cleanup;
1488
1489 if (cg_test_proc_killed(child))
1490 goto cleanup;
1491
1492 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
1493 goto cleanup;
1494
1495 parent_oom_events = cg_read_key_long(
1496 parent, "memory.events", "oom_kill ");
1497 /*
1498 * If memory_localevents is not enabled (the default), the parent should
1499 * count OOM events in its children groups. Otherwise, it should not
1500 * have observed any events.
1501 */
1502 if (has_localevents && parent_oom_events != 0)
1503 goto cleanup;
1504 else if (!has_localevents && parent_oom_events <= 0)
1505 goto cleanup;
1506
1507 ret = KSFT_PASS;
1508
1509 cleanup:
1510 if (child)
1511 cg_destroy(child);
1512 if (parent)
1513 cg_destroy(parent);
1514 free(child);
1515 free(parent);
1516
1517 return ret;
1518 }
1519
1520 /*
1521 * This test disables swapping and tries to allocate anonymous memory
1522 * up to OOM with memory.group.oom set. Then it checks that all
1523 * processes in the parent and leaf were killed.
1524 */
test_memcg_oom_group_parent_events(const char * root)1525 static int test_memcg_oom_group_parent_events(const char *root)
1526 {
1527 int ret = KSFT_FAIL;
1528 char *parent, *child;
1529
1530 parent = cg_name(root, "memcg_test_0");
1531 child = cg_name(root, "memcg_test_0/memcg_test_1");
1532
1533 if (!parent || !child)
1534 goto cleanup;
1535
1536 if (cg_create(parent))
1537 goto cleanup;
1538
1539 if (cg_create(child))
1540 goto cleanup;
1541
1542 if (cg_write(parent, "memory.max", "80M"))
1543 goto cleanup;
1544
1545 if (cg_write(parent, "memory.swap.max", "0"))
1546 goto cleanup;
1547
1548 if (cg_write(parent, "memory.oom.group", "1"))
1549 goto cleanup;
1550
1551 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1552 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1553 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1554
1555 if (!cg_run(child, alloc_anon, (void *)MB(100)))
1556 goto cleanup;
1557
1558 if (cg_test_proc_killed(child))
1559 goto cleanup;
1560 if (cg_test_proc_killed(parent))
1561 goto cleanup;
1562
1563 ret = KSFT_PASS;
1564
1565 cleanup:
1566 if (child)
1567 cg_destroy(child);
1568 if (parent)
1569 cg_destroy(parent);
1570 free(child);
1571 free(parent);
1572
1573 return ret;
1574 }
1575
1576 /*
1577 * This test disables swapping and tries to allocate anonymous memory
1578 * up to OOM with memory.group.oom set. Then it checks that all
1579 * processes were killed except those set with OOM_SCORE_ADJ_MIN
1580 */
test_memcg_oom_group_score_events(const char * root)1581 static int test_memcg_oom_group_score_events(const char *root)
1582 {
1583 int ret = KSFT_FAIL;
1584 char *memcg;
1585 int safe_pid;
1586
1587 memcg = cg_name(root, "memcg_test_0");
1588
1589 if (!memcg)
1590 goto cleanup;
1591
1592 if (cg_create(memcg))
1593 goto cleanup;
1594
1595 if (cg_write(memcg, "memory.max", "50M"))
1596 goto cleanup;
1597
1598 if (cg_write(memcg, "memory.swap.max", "0"))
1599 goto cleanup;
1600
1601 if (cg_write(memcg, "memory.oom.group", "1"))
1602 goto cleanup;
1603
1604 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1605 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
1606 goto cleanup;
1607
1608 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1609 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1610 goto cleanup;
1611
1612 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
1613 goto cleanup;
1614
1615 if (kill(safe_pid, SIGKILL))
1616 goto cleanup;
1617
1618 ret = KSFT_PASS;
1619
1620 cleanup:
1621 if (memcg)
1622 cg_destroy(memcg);
1623 free(memcg);
1624
1625 return ret;
1626 }
1627
1628 #define T(x) { x, #x }
1629 struct memcg_test {
1630 int (*fn)(const char *root);
1631 const char *name;
1632 } tests[] = {
1633 T(test_memcg_subtree_control),
1634 T(test_memcg_current_peak),
1635 T(test_memcg_min),
1636 T(test_memcg_low),
1637 T(test_memcg_high),
1638 T(test_memcg_high_sync),
1639 T(test_memcg_max),
1640 T(test_memcg_reclaim),
1641 T(test_memcg_oom_events),
1642 T(test_memcg_swap_max_peak),
1643 T(test_memcg_sock),
1644 T(test_memcg_oom_group_leaf_events),
1645 T(test_memcg_oom_group_parent_events),
1646 T(test_memcg_oom_group_score_events),
1647 };
1648 #undef T
1649
main(int argc,char ** argv)1650 int main(int argc, char **argv)
1651 {
1652 char root[PATH_MAX];
1653 int i, proc_status, ret = EXIT_SUCCESS;
1654
1655 if (cg_find_unified_root(root, sizeof(root), NULL))
1656 ksft_exit_skip("cgroup v2 isn't mounted\n");
1657
1658 /*
1659 * Check that memory controller is available:
1660 * memory is listed in cgroup.controllers
1661 */
1662 if (cg_read_strstr(root, "cgroup.controllers", "memory"))
1663 ksft_exit_skip("memory controller isn't available\n");
1664
1665 if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
1666 if (cg_write(root, "cgroup.subtree_control", "+memory"))
1667 ksft_exit_skip("Failed to set memory controller\n");
1668
1669 proc_status = proc_mount_contains("memory_recursiveprot");
1670 if (proc_status < 0)
1671 ksft_exit_skip("Failed to query cgroup mount option\n");
1672 has_recursiveprot = proc_status;
1673
1674 proc_status = proc_mount_contains("memory_localevents");
1675 if (proc_status < 0)
1676 ksft_exit_skip("Failed to query cgroup mount option\n");
1677 has_localevents = proc_status;
1678
1679 for (i = 0; i < ARRAY_SIZE(tests); i++) {
1680 switch (tests[i].fn(root)) {
1681 case KSFT_PASS:
1682 ksft_test_result_pass("%s\n", tests[i].name);
1683 break;
1684 case KSFT_SKIP:
1685 ksft_test_result_skip("%s\n", tests[i].name);
1686 break;
1687 default:
1688 ret = EXIT_FAILURE;
1689 ksft_test_result_fail("%s\n", tests[i].name);
1690 break;
1691 }
1692 }
1693
1694 return ret;
1695 }
1696