xref: /linux/tools/testing/selftests/cgroup/test_zswap.c (revision 6aee5aed2edd0a156bf060abce1bdbbc38171c10)
1fe3b1bf1SDomenico Cerasuolo // SPDX-License-Identifier: GPL-2.0
2a97853f2SShuah Khan #define _GNU_SOURCE
3a97853f2SShuah Khan 
4fe3b1bf1SDomenico Cerasuolo #include <linux/limits.h>
5fe3b1bf1SDomenico Cerasuolo #include <unistd.h>
6fe3b1bf1SDomenico Cerasuolo #include <stdio.h>
7a549f9f3SDomenico Cerasuolo #include <signal.h>
8a549f9f3SDomenico Cerasuolo #include <sys/sysinfo.h>
9a549f9f3SDomenico Cerasuolo #include <string.h>
10a549f9f3SDomenico Cerasuolo #include <sys/wait.h>
11a549f9f3SDomenico Cerasuolo #include <sys/mman.h>
12fe3b1bf1SDomenico Cerasuolo 
13fe3b1bf1SDomenico Cerasuolo #include "../kselftest.h"
14fe3b1bf1SDomenico Cerasuolo #include "cgroup_util.h"
15fe3b1bf1SDomenico Cerasuolo 
read_int(const char * path,size_t * value)16a549f9f3SDomenico Cerasuolo static int read_int(const char *path, size_t *value)
17a549f9f3SDomenico Cerasuolo {
18a549f9f3SDomenico Cerasuolo 	FILE *file;
19a549f9f3SDomenico Cerasuolo 	int ret = 0;
20a549f9f3SDomenico Cerasuolo 
21a549f9f3SDomenico Cerasuolo 	file = fopen(path, "r");
22a549f9f3SDomenico Cerasuolo 	if (!file)
23a549f9f3SDomenico Cerasuolo 		return -1;
24a549f9f3SDomenico Cerasuolo 	if (fscanf(file, "%ld", value) != 1)
25a549f9f3SDomenico Cerasuolo 		ret = -1;
26a549f9f3SDomenico Cerasuolo 	fclose(file);
27a549f9f3SDomenico Cerasuolo 	return ret;
28a549f9f3SDomenico Cerasuolo }
29a549f9f3SDomenico Cerasuolo 
set_min_free_kb(size_t value)30a549f9f3SDomenico Cerasuolo static int set_min_free_kb(size_t value)
31a549f9f3SDomenico Cerasuolo {
32a549f9f3SDomenico Cerasuolo 	FILE *file;
33a549f9f3SDomenico Cerasuolo 	int ret;
34a549f9f3SDomenico Cerasuolo 
35a549f9f3SDomenico Cerasuolo 	file = fopen("/proc/sys/vm/min_free_kbytes", "w");
36a549f9f3SDomenico Cerasuolo 	if (!file)
37a549f9f3SDomenico Cerasuolo 		return -1;
38a549f9f3SDomenico Cerasuolo 	ret = fprintf(file, "%ld\n", value);
39a549f9f3SDomenico Cerasuolo 	fclose(file);
40a549f9f3SDomenico Cerasuolo 	return ret;
41a549f9f3SDomenico Cerasuolo }
42a549f9f3SDomenico Cerasuolo 
read_min_free_kb(size_t * value)43a549f9f3SDomenico Cerasuolo static int read_min_free_kb(size_t *value)
44a549f9f3SDomenico Cerasuolo {
45a549f9f3SDomenico Cerasuolo 	return read_int("/proc/sys/vm/min_free_kbytes", value);
46a549f9f3SDomenico Cerasuolo }
47a549f9f3SDomenico Cerasuolo 
get_zswap_stored_pages(size_t * value)48a549f9f3SDomenico Cerasuolo static int get_zswap_stored_pages(size_t *value)
49a549f9f3SDomenico Cerasuolo {
50a549f9f3SDomenico Cerasuolo 	return read_int("/sys/kernel/debug/zswap/stored_pages", value);
51a549f9f3SDomenico Cerasuolo }
52a549f9f3SDomenico Cerasuolo 
get_cg_wb_count(const char * cg)53158863e5SUsama Arif static long get_cg_wb_count(const char *cg)
54d9cfaf40SDomenico Cerasuolo {
55012688f6SNhat Pham 	return cg_read_key_long(cg, "memory.stat", "zswpwb");
56d9cfaf40SDomenico Cerasuolo }
57d9cfaf40SDomenico Cerasuolo 
get_zswpout(const char * cgroup)586479b292SNhat Pham static long get_zswpout(const char *cgroup)
596479b292SNhat Pham {
606479b292SNhat Pham 	return cg_read_key_long(cgroup, "memory.stat", "zswpout ");
616479b292SNhat Pham }
626479b292SNhat Pham 
allocate_and_read_bytes(const char * cgroup,void * arg)63b93c28ffSNhat Pham static int allocate_and_read_bytes(const char *cgroup, void *arg)
64b93c28ffSNhat Pham {
65b93c28ffSNhat Pham 	size_t size = (size_t)arg;
66b93c28ffSNhat Pham 	char *mem = (char *)malloc(size);
67b93c28ffSNhat Pham 	int ret = 0;
68b93c28ffSNhat Pham 
69b93c28ffSNhat Pham 	if (!mem)
70b93c28ffSNhat Pham 		return -1;
71b93c28ffSNhat Pham 	for (int i = 0; i < size; i += 4095)
72b93c28ffSNhat Pham 		mem[i] = 'a';
73b93c28ffSNhat Pham 
74b93c28ffSNhat Pham 	/* Go through the allocated memory to (z)swap in and out pages */
75b93c28ffSNhat Pham 	for (int i = 0; i < size; i += 4095) {
76b93c28ffSNhat Pham 		if (mem[i] != 'a')
77b93c28ffSNhat Pham 			ret = -1;
78b93c28ffSNhat Pham 	}
79b93c28ffSNhat Pham 
80b93c28ffSNhat Pham 	free(mem);
81b93c28ffSNhat Pham 	return ret;
82b93c28ffSNhat Pham }
83b93c28ffSNhat Pham 
allocate_bytes(const char * cgroup,void * arg)84d9cfaf40SDomenico Cerasuolo static int allocate_bytes(const char *cgroup, void *arg)
85d9cfaf40SDomenico Cerasuolo {
86d9cfaf40SDomenico Cerasuolo 	size_t size = (size_t)arg;
87d9cfaf40SDomenico Cerasuolo 	char *mem = (char *)malloc(size);
88d9cfaf40SDomenico Cerasuolo 
89d9cfaf40SDomenico Cerasuolo 	if (!mem)
90d9cfaf40SDomenico Cerasuolo 		return -1;
91d9cfaf40SDomenico Cerasuolo 	for (int i = 0; i < size; i += 4095)
92d9cfaf40SDomenico Cerasuolo 		mem[i] = 'a';
93d9cfaf40SDomenico Cerasuolo 	free(mem);
94d9cfaf40SDomenico Cerasuolo 	return 0;
95d9cfaf40SDomenico Cerasuolo }
96d9cfaf40SDomenico Cerasuolo 
setup_test_group_1M(const char * root,const char * name)97a697dc2bSDomenico Cerasuolo static char *setup_test_group_1M(const char *root, const char *name)
98a697dc2bSDomenico Cerasuolo {
99a697dc2bSDomenico Cerasuolo 	char *group_name = cg_name(root, name);
100a697dc2bSDomenico Cerasuolo 
101a697dc2bSDomenico Cerasuolo 	if (!group_name)
102a697dc2bSDomenico Cerasuolo 		return NULL;
103a697dc2bSDomenico Cerasuolo 	if (cg_create(group_name))
104a697dc2bSDomenico Cerasuolo 		goto fail;
105a697dc2bSDomenico Cerasuolo 	if (cg_write(group_name, "memory.max", "1M")) {
106a697dc2bSDomenico Cerasuolo 		cg_destroy(group_name);
107a697dc2bSDomenico Cerasuolo 		goto fail;
108a697dc2bSDomenico Cerasuolo 	}
109a697dc2bSDomenico Cerasuolo 	return group_name;
110a697dc2bSDomenico Cerasuolo fail:
111a697dc2bSDomenico Cerasuolo 	free(group_name);
112a697dc2bSDomenico Cerasuolo 	return NULL;
113a697dc2bSDomenico Cerasuolo }
114a697dc2bSDomenico Cerasuolo 
115d9cfaf40SDomenico Cerasuolo /*
1166479b292SNhat Pham  * Sanity test to check that pages are written into zswap.
1176479b292SNhat Pham  */
test_zswap_usage(const char * root)1186479b292SNhat Pham static int test_zswap_usage(const char *root)
1196479b292SNhat Pham {
1206479b292SNhat Pham 	long zswpout_before, zswpout_after;
1216479b292SNhat Pham 	int ret = KSFT_FAIL;
1226479b292SNhat Pham 	char *test_group;
1236479b292SNhat Pham 
1246479b292SNhat Pham 	test_group = cg_name(root, "no_shrink_test");
1256479b292SNhat Pham 	if (!test_group)
1266479b292SNhat Pham 		goto out;
1276479b292SNhat Pham 	if (cg_create(test_group))
1286479b292SNhat Pham 		goto out;
1296479b292SNhat Pham 	if (cg_write(test_group, "memory.max", "1M"))
1306479b292SNhat Pham 		goto out;
1316479b292SNhat Pham 
1326479b292SNhat Pham 	zswpout_before = get_zswpout(test_group);
1336479b292SNhat Pham 	if (zswpout_before < 0) {
1346479b292SNhat Pham 		ksft_print_msg("Failed to get zswpout\n");
1356479b292SNhat Pham 		goto out;
1366479b292SNhat Pham 	}
1376479b292SNhat Pham 
1386479b292SNhat Pham 	/* Allocate more than memory.max to push memory into zswap */
1396479b292SNhat Pham 	if (cg_run(test_group, allocate_bytes, (void *)MB(4)))
1406479b292SNhat Pham 		goto out;
1416479b292SNhat Pham 
1426479b292SNhat Pham 	/* Verify that pages come into zswap */
1436479b292SNhat Pham 	zswpout_after = get_zswpout(test_group);
1446479b292SNhat Pham 	if (zswpout_after <= zswpout_before) {
1456479b292SNhat Pham 		ksft_print_msg("zswpout does not increase after test program\n");
1466479b292SNhat Pham 		goto out;
1476479b292SNhat Pham 	}
1486479b292SNhat Pham 	ret = KSFT_PASS;
1496479b292SNhat Pham 
1506479b292SNhat Pham out:
1516479b292SNhat Pham 	cg_destroy(test_group);
1526479b292SNhat Pham 	free(test_group);
1536479b292SNhat Pham 	return ret;
1546479b292SNhat Pham }
1556479b292SNhat Pham 
1566479b292SNhat Pham /*
157b93c28ffSNhat Pham  * Check that when memory.zswap.max = 0, no pages can go to the zswap pool for
158b93c28ffSNhat Pham  * the cgroup.
159b93c28ffSNhat Pham  */
test_swapin_nozswap(const char * root)160b93c28ffSNhat Pham static int test_swapin_nozswap(const char *root)
161b93c28ffSNhat Pham {
162b93c28ffSNhat Pham 	int ret = KSFT_FAIL;
163b93c28ffSNhat Pham 	char *test_group;
164b93c28ffSNhat Pham 	long swap_peak, zswpout;
165b93c28ffSNhat Pham 
166b93c28ffSNhat Pham 	test_group = cg_name(root, "no_zswap_test");
167b93c28ffSNhat Pham 	if (!test_group)
168b93c28ffSNhat Pham 		goto out;
169b93c28ffSNhat Pham 	if (cg_create(test_group))
170b93c28ffSNhat Pham 		goto out;
171b93c28ffSNhat Pham 	if (cg_write(test_group, "memory.max", "8M"))
172b93c28ffSNhat Pham 		goto out;
173b93c28ffSNhat Pham 	if (cg_write(test_group, "memory.zswap.max", "0"))
174b93c28ffSNhat Pham 		goto out;
175b93c28ffSNhat Pham 
176b93c28ffSNhat Pham 	/* Allocate and read more than memory.max to trigger swapin */
177b93c28ffSNhat Pham 	if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
178b93c28ffSNhat Pham 		goto out;
179b93c28ffSNhat Pham 
180b93c28ffSNhat Pham 	/* Verify that pages are swapped out, but no zswap happened */
181b93c28ffSNhat Pham 	swap_peak = cg_read_long(test_group, "memory.swap.peak");
182b93c28ffSNhat Pham 	if (swap_peak < 0) {
183b93c28ffSNhat Pham 		ksft_print_msg("failed to get cgroup's swap_peak\n");
184b93c28ffSNhat Pham 		goto out;
185b93c28ffSNhat Pham 	}
186b93c28ffSNhat Pham 
187b93c28ffSNhat Pham 	if (swap_peak < MB(24)) {
188b93c28ffSNhat Pham 		ksft_print_msg("at least 24MB of memory should be swapped out\n");
189b93c28ffSNhat Pham 		goto out;
190b93c28ffSNhat Pham 	}
191b93c28ffSNhat Pham 
192b93c28ffSNhat Pham 	zswpout = get_zswpout(test_group);
193b93c28ffSNhat Pham 	if (zswpout < 0) {
194b93c28ffSNhat Pham 		ksft_print_msg("failed to get zswpout\n");
195b93c28ffSNhat Pham 		goto out;
196b93c28ffSNhat Pham 	}
197b93c28ffSNhat Pham 
198b93c28ffSNhat Pham 	if (zswpout > 0) {
199b93c28ffSNhat Pham 		ksft_print_msg("zswapout > 0 when memory.zswap.max = 0\n");
200b93c28ffSNhat Pham 		goto out;
201b93c28ffSNhat Pham 	}
202b93c28ffSNhat Pham 
203b93c28ffSNhat Pham 	ret = KSFT_PASS;
204b93c28ffSNhat Pham 
205b93c28ffSNhat Pham out:
206b93c28ffSNhat Pham 	cg_destroy(test_group);
207b93c28ffSNhat Pham 	free(test_group);
208b93c28ffSNhat Pham 	return ret;
209b93c28ffSNhat Pham }
210b93c28ffSNhat Pham 
211b93c28ffSNhat Pham /* Simple test to verify the (z)swapin code paths */
test_zswapin(const char * root)212b93c28ffSNhat Pham static int test_zswapin(const char *root)
213b93c28ffSNhat Pham {
214b93c28ffSNhat Pham 	int ret = KSFT_FAIL;
215b93c28ffSNhat Pham 	char *test_group;
216b93c28ffSNhat Pham 	long zswpin;
217b93c28ffSNhat Pham 
218b93c28ffSNhat Pham 	test_group = cg_name(root, "zswapin_test");
219b93c28ffSNhat Pham 	if (!test_group)
220b93c28ffSNhat Pham 		goto out;
221b93c28ffSNhat Pham 	if (cg_create(test_group))
222b93c28ffSNhat Pham 		goto out;
223b93c28ffSNhat Pham 	if (cg_write(test_group, "memory.max", "8M"))
224b93c28ffSNhat Pham 		goto out;
225b93c28ffSNhat Pham 	if (cg_write(test_group, "memory.zswap.max", "max"))
226b93c28ffSNhat Pham 		goto out;
227b93c28ffSNhat Pham 
228b93c28ffSNhat Pham 	/* Allocate and read more than memory.max to trigger (z)swap in */
229b93c28ffSNhat Pham 	if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
230b93c28ffSNhat Pham 		goto out;
231b93c28ffSNhat Pham 
232b93c28ffSNhat Pham 	zswpin = cg_read_key_long(test_group, "memory.stat", "zswpin ");
233b93c28ffSNhat Pham 	if (zswpin < 0) {
234b93c28ffSNhat Pham 		ksft_print_msg("failed to get zswpin\n");
235b93c28ffSNhat Pham 		goto out;
236b93c28ffSNhat Pham 	}
237b93c28ffSNhat Pham 
238b93c28ffSNhat Pham 	if (zswpin < MB(24) / PAGE_SIZE) {
239b93c28ffSNhat Pham 		ksft_print_msg("at least 24MB should be brought back from zswap\n");
240b93c28ffSNhat Pham 		goto out;
241b93c28ffSNhat Pham 	}
242b93c28ffSNhat Pham 
243b93c28ffSNhat Pham 	ret = KSFT_PASS;
244b93c28ffSNhat Pham 
245b93c28ffSNhat Pham out:
246b93c28ffSNhat Pham 	cg_destroy(test_group);
247b93c28ffSNhat Pham 	free(test_group);
248b93c28ffSNhat Pham 	return ret;
249b93c28ffSNhat Pham }
250b93c28ffSNhat Pham 
251b93c28ffSNhat Pham /*
252158863e5SUsama Arif  * Attempt writeback with the following steps:
253158863e5SUsama Arif  * 1. Allocate memory.
254158863e5SUsama Arif  * 2. Reclaim memory equal to the amount that was allocated in step 1.
255158863e5SUsama Arif       This will move it into zswap.
256158863e5SUsama Arif  * 3. Save current zswap usage.
257158863e5SUsama Arif  * 4. Move the memory allocated in step 1 back in from zswap.
258158863e5SUsama Arif  * 5. Set zswap.max to half the amount that was recorded in step 3.
259158863e5SUsama Arif  * 6. Attempt to reclaim memory equal to the amount that was allocated,
260158863e5SUsama Arif       this will either trigger writeback if it's enabled, or reclamation
261158863e5SUsama Arif       will fail if writeback is disabled as there isn't enough zswap space.
262158863e5SUsama Arif  */
attempt_writeback(const char * cgroup,void * arg)263158863e5SUsama Arif static int attempt_writeback(const char *cgroup, void *arg)
264158863e5SUsama Arif {
265158863e5SUsama Arif 	long pagesize = sysconf(_SC_PAGESIZE);
266158863e5SUsama Arif 	size_t memsize = MB(4);
267158863e5SUsama Arif 	char buf[pagesize];
268158863e5SUsama Arif 	long zswap_usage;
269fd06ce2cSMike Yuan 	bool wb_enabled = *(bool *) arg;
270158863e5SUsama Arif 	int ret = -1;
271158863e5SUsama Arif 	char *mem;
272158863e5SUsama Arif 
273158863e5SUsama Arif 	mem = (char *)malloc(memsize);
274158863e5SUsama Arif 	if (!mem)
275158863e5SUsama Arif 		return ret;
276158863e5SUsama Arif 
277158863e5SUsama Arif 	/*
278158863e5SUsama Arif 	 * Fill half of each page with increasing data, and keep other
279158863e5SUsama Arif 	 * half empty, this will result in data that is still compressible
280158863e5SUsama Arif 	 * and ends up in zswap, with material zswap usage.
281158863e5SUsama Arif 	 */
282158863e5SUsama Arif 	for (int i = 0; i < pagesize; i++)
283158863e5SUsama Arif 		buf[i] = i < pagesize/2 ? (char) i : 0;
284158863e5SUsama Arif 
285158863e5SUsama Arif 	for (int i = 0; i < memsize; i += pagesize)
286158863e5SUsama Arif 		memcpy(&mem[i], buf, pagesize);
287158863e5SUsama Arif 
288158863e5SUsama Arif 	/* Try and reclaim allocated memory */
289fd06ce2cSMike Yuan 	if (cg_write_numeric(cgroup, "memory.reclaim", memsize)) {
290158863e5SUsama Arif 		ksft_print_msg("Failed to reclaim all of the requested memory\n");
291158863e5SUsama Arif 		goto out;
292158863e5SUsama Arif 	}
293158863e5SUsama Arif 
294fd06ce2cSMike Yuan 	zswap_usage = cg_read_long(cgroup, "memory.zswap.current");
295158863e5SUsama Arif 
296158863e5SUsama Arif 	/* zswpin */
297158863e5SUsama Arif 	for (int i = 0; i < memsize; i += pagesize) {
298158863e5SUsama Arif 		if (memcmp(&mem[i], buf, pagesize)) {
299158863e5SUsama Arif 			ksft_print_msg("invalid memory\n");
300158863e5SUsama Arif 			goto out;
301158863e5SUsama Arif 		}
302158863e5SUsama Arif 	}
303158863e5SUsama Arif 
304fd06ce2cSMike Yuan 	if (cg_write_numeric(cgroup, "memory.zswap.max", zswap_usage/2))
305158863e5SUsama Arif 		goto out;
306158863e5SUsama Arif 
307158863e5SUsama Arif 	/*
308158863e5SUsama Arif 	 * If writeback is enabled, trying to reclaim memory now will trigger a
309158863e5SUsama Arif 	 * writeback as zswap.max is half of what was needed when reclaim ran the first time.
310158863e5SUsama Arif 	 * If writeback is disabled, memory reclaim will fail as zswap is limited and
311158863e5SUsama Arif 	 * it can't writeback to swap.
312158863e5SUsama Arif 	 */
313fd06ce2cSMike Yuan 	ret = cg_write_numeric(cgroup, "memory.reclaim", memsize);
314158863e5SUsama Arif 	if (!wb_enabled)
315158863e5SUsama Arif 		ret = (ret == -EAGAIN) ? 0 : -1;
316158863e5SUsama Arif 
317158863e5SUsama Arif out:
318158863e5SUsama Arif 	free(mem);
319158863e5SUsama Arif 	return ret;
320158863e5SUsama Arif }
321158863e5SUsama Arif 
test_zswap_writeback_one(const char * cgroup,bool wb)322fd06ce2cSMike Yuan static int test_zswap_writeback_one(const char *cgroup, bool wb)
323fd06ce2cSMike Yuan {
324fd06ce2cSMike Yuan 	long zswpwb_before, zswpwb_after;
325fd06ce2cSMike Yuan 
326fd06ce2cSMike Yuan 	zswpwb_before = get_cg_wb_count(cgroup);
327fd06ce2cSMike Yuan 	if (zswpwb_before != 0) {
328fd06ce2cSMike Yuan 		ksft_print_msg("zswpwb_before = %ld instead of 0\n", zswpwb_before);
329fd06ce2cSMike Yuan 		return -1;
330fd06ce2cSMike Yuan 	}
331fd06ce2cSMike Yuan 
332fd06ce2cSMike Yuan 	if (cg_run(cgroup, attempt_writeback, (void *) &wb))
333fd06ce2cSMike Yuan 		return -1;
334fd06ce2cSMike Yuan 
335fd06ce2cSMike Yuan 	/* Verify that zswap writeback occurred only if writeback was enabled */
336fd06ce2cSMike Yuan 	zswpwb_after = get_cg_wb_count(cgroup);
337fd06ce2cSMike Yuan 	if (zswpwb_after < 0)
338fd06ce2cSMike Yuan 		return -1;
339fd06ce2cSMike Yuan 
340fd06ce2cSMike Yuan 	if (wb != !!zswpwb_after) {
341*e07caae7SSebastian Chlad 		ksft_print_msg("zswpwb_after is %ld while wb is %s\n",
342fd06ce2cSMike Yuan 				zswpwb_after, wb ? "enabled" : "disabled");
343fd06ce2cSMike Yuan 		return -1;
344fd06ce2cSMike Yuan 	}
345fd06ce2cSMike Yuan 
346fd06ce2cSMike Yuan 	return 0;
347fd06ce2cSMike Yuan }
348fd06ce2cSMike Yuan 
349158863e5SUsama Arif /* Test to verify the zswap writeback path */
test_zswap_writeback(const char * root,bool wb)350158863e5SUsama Arif static int test_zswap_writeback(const char *root, bool wb)
351158863e5SUsama Arif {
352158863e5SUsama Arif 	int ret = KSFT_FAIL;
353fd06ce2cSMike Yuan 	char *test_group, *test_group_child = NULL;
354fd06ce2cSMike Yuan 
355fd06ce2cSMike Yuan 	if (cg_read_strcmp(root, "memory.zswap.writeback", "1"))
356fd06ce2cSMike Yuan 		return KSFT_SKIP;
357158863e5SUsama Arif 
358158863e5SUsama Arif 	test_group = cg_name(root, "zswap_writeback_test");
359158863e5SUsama Arif 	if (!test_group)
360158863e5SUsama Arif 		goto out;
361158863e5SUsama Arif 	if (cg_create(test_group))
362158863e5SUsama Arif 		goto out;
363158863e5SUsama Arif 	if (cg_write(test_group, "memory.zswap.writeback", wb ? "1" : "0"))
364158863e5SUsama Arif 		goto out;
365158863e5SUsama Arif 
366fd06ce2cSMike Yuan 	if (test_zswap_writeback_one(test_group, wb))
367158863e5SUsama Arif 		goto out;
368158863e5SUsama Arif 
369fd06ce2cSMike Yuan 	/* Reset memory.zswap.max to max (modified by attempt_writeback), and
370fd06ce2cSMike Yuan 	 * set up child cgroup, whose memory.zswap.writeback is hardcoded to 1.
371fd06ce2cSMike Yuan 	 * Thus, the parent's setting shall be what's in effect. */
372fd06ce2cSMike Yuan 	if (cg_write(test_group, "memory.zswap.max", "max"))
373fd06ce2cSMike Yuan 		goto out;
374fd06ce2cSMike Yuan 	if (cg_write(test_group, "cgroup.subtree_control", "+memory"))
375158863e5SUsama Arif 		goto out;
376158863e5SUsama Arif 
377fd06ce2cSMike Yuan 	test_group_child = cg_name(test_group, "zswap_writeback_test_child");
378fd06ce2cSMike Yuan 	if (!test_group_child)
379158863e5SUsama Arif 		goto out;
380fd06ce2cSMike Yuan 	if (cg_create(test_group_child))
381fd06ce2cSMike Yuan 		goto out;
382fd06ce2cSMike Yuan 	if (cg_write(test_group_child, "memory.zswap.writeback", "1"))
383fd06ce2cSMike Yuan 		goto out;
384fd06ce2cSMike Yuan 
385fd06ce2cSMike Yuan 	if (test_zswap_writeback_one(test_group_child, wb))
386fd06ce2cSMike Yuan 		goto out;
387158863e5SUsama Arif 
388158863e5SUsama Arif 	ret = KSFT_PASS;
389158863e5SUsama Arif 
390158863e5SUsama Arif out:
391fd06ce2cSMike Yuan 	if (test_group_child) {
392fd06ce2cSMike Yuan 		cg_destroy(test_group_child);
393fd06ce2cSMike Yuan 		free(test_group_child);
394fd06ce2cSMike Yuan 	}
395158863e5SUsama Arif 	cg_destroy(test_group);
396158863e5SUsama Arif 	free(test_group);
397158863e5SUsama Arif 	return ret;
398158863e5SUsama Arif }
399158863e5SUsama Arif 
test_zswap_writeback_enabled(const char * root)400158863e5SUsama Arif static int test_zswap_writeback_enabled(const char *root)
401158863e5SUsama Arif {
402158863e5SUsama Arif 	return test_zswap_writeback(root, true);
403158863e5SUsama Arif }
404158863e5SUsama Arif 
test_zswap_writeback_disabled(const char * root)405158863e5SUsama Arif static int test_zswap_writeback_disabled(const char *root)
406158863e5SUsama Arif {
407158863e5SUsama Arif 	return test_zswap_writeback(root, false);
408158863e5SUsama Arif }
409158863e5SUsama Arif 
410158863e5SUsama Arif /*
411d9cfaf40SDomenico Cerasuolo  * When trying to store a memcg page in zswap, if the memcg hits its memory
412a697dc2bSDomenico Cerasuolo  * limit in zswap, writeback should affect only the zswapped pages of that
413a697dc2bSDomenico Cerasuolo  * memcg.
414d9cfaf40SDomenico Cerasuolo  */
test_no_invasive_cgroup_shrink(const char * root)415d9cfaf40SDomenico Cerasuolo static int test_no_invasive_cgroup_shrink(const char *root)
416d9cfaf40SDomenico Cerasuolo {
417d9cfaf40SDomenico Cerasuolo 	int ret = KSFT_FAIL;
418a697dc2bSDomenico Cerasuolo 	size_t control_allocation_size = MB(10);
4198f6d24a5SJohn Hubbard 	char *control_allocation = NULL, *wb_group = NULL, *control_group = NULL;
420d9cfaf40SDomenico Cerasuolo 
421a697dc2bSDomenico Cerasuolo 	wb_group = setup_test_group_1M(root, "per_memcg_wb_test1");
422a697dc2bSDomenico Cerasuolo 	if (!wb_group)
423a697dc2bSDomenico Cerasuolo 		return KSFT_FAIL;
424a697dc2bSDomenico Cerasuolo 	if (cg_write(wb_group, "memory.zswap.max", "10K"))
425d9cfaf40SDomenico Cerasuolo 		goto out;
426a697dc2bSDomenico Cerasuolo 	control_group = setup_test_group_1M(root, "per_memcg_wb_test2");
427a697dc2bSDomenico Cerasuolo 	if (!control_group)
428d9cfaf40SDomenico Cerasuolo 		goto out;
429d9cfaf40SDomenico Cerasuolo 
430a697dc2bSDomenico Cerasuolo 	/* Push some test_group2 memory into zswap */
431a697dc2bSDomenico Cerasuolo 	if (cg_enter_current(control_group))
432a697dc2bSDomenico Cerasuolo 		goto out;
433a697dc2bSDomenico Cerasuolo 	control_allocation = malloc(control_allocation_size);
434a697dc2bSDomenico Cerasuolo 	for (int i = 0; i < control_allocation_size; i += 4095)
435a697dc2bSDomenico Cerasuolo 		control_allocation[i] = 'a';
436a697dc2bSDomenico Cerasuolo 	if (cg_read_key_long(control_group, "memory.stat", "zswapped") < 1)
437d9cfaf40SDomenico Cerasuolo 		goto out;
438d9cfaf40SDomenico Cerasuolo 
439a697dc2bSDomenico Cerasuolo 	/* Allocate 10x memory.max to push wb_group memory into zswap and trigger wb */
440a697dc2bSDomenico Cerasuolo 	if (cg_run(wb_group, allocate_bytes, (void *)MB(10)))
441d9cfaf40SDomenico Cerasuolo 		goto out;
442a697dc2bSDomenico Cerasuolo 
443a697dc2bSDomenico Cerasuolo 	/* Verify that only zswapped memory from gwb_group has been written back */
444a697dc2bSDomenico Cerasuolo 	if (get_cg_wb_count(wb_group) > 0 && get_cg_wb_count(control_group) == 0)
445d9cfaf40SDomenico Cerasuolo 		ret = KSFT_PASS;
446d9cfaf40SDomenico Cerasuolo out:
447a697dc2bSDomenico Cerasuolo 	cg_enter_current(root);
448a697dc2bSDomenico Cerasuolo 	if (control_group) {
449a697dc2bSDomenico Cerasuolo 		cg_destroy(control_group);
450a697dc2bSDomenico Cerasuolo 		free(control_group);
451a697dc2bSDomenico Cerasuolo 	}
452a697dc2bSDomenico Cerasuolo 	cg_destroy(wb_group);
453a697dc2bSDomenico Cerasuolo 	free(wb_group);
454a697dc2bSDomenico Cerasuolo 	if (control_allocation)
455a697dc2bSDomenico Cerasuolo 		free(control_allocation);
456d9cfaf40SDomenico Cerasuolo 	return ret;
457d9cfaf40SDomenico Cerasuolo }
458d9cfaf40SDomenico Cerasuolo 
459a549f9f3SDomenico Cerasuolo struct no_kmem_bypass_child_args {
460a549f9f3SDomenico Cerasuolo 	size_t target_alloc_bytes;
461a549f9f3SDomenico Cerasuolo 	size_t child_allocated;
462a549f9f3SDomenico Cerasuolo };
463a549f9f3SDomenico Cerasuolo 
no_kmem_bypass_child(const char * cgroup,void * arg)464a549f9f3SDomenico Cerasuolo static int no_kmem_bypass_child(const char *cgroup, void *arg)
465a549f9f3SDomenico Cerasuolo {
466a549f9f3SDomenico Cerasuolo 	struct no_kmem_bypass_child_args *values = arg;
467a549f9f3SDomenico Cerasuolo 	void *allocation;
468a549f9f3SDomenico Cerasuolo 
469a549f9f3SDomenico Cerasuolo 	allocation = malloc(values->target_alloc_bytes);
470a549f9f3SDomenico Cerasuolo 	if (!allocation) {
471a549f9f3SDomenico Cerasuolo 		values->child_allocated = true;
472a549f9f3SDomenico Cerasuolo 		return -1;
473a549f9f3SDomenico Cerasuolo 	}
474a549f9f3SDomenico Cerasuolo 	for (long i = 0; i < values->target_alloc_bytes; i += 4095)
475a549f9f3SDomenico Cerasuolo 		((char *)allocation)[i] = 'a';
476a549f9f3SDomenico Cerasuolo 	values->child_allocated = true;
477a549f9f3SDomenico Cerasuolo 	pause();
478a549f9f3SDomenico Cerasuolo 	free(allocation);
479a549f9f3SDomenico Cerasuolo 	return 0;
480a549f9f3SDomenico Cerasuolo }
481a549f9f3SDomenico Cerasuolo 
482a549f9f3SDomenico Cerasuolo /*
483a549f9f3SDomenico Cerasuolo  * When pages owned by a memcg are pushed to zswap by kswapd, they should be
484a549f9f3SDomenico Cerasuolo  * charged to that cgroup. This wasn't the case before commit
485a549f9f3SDomenico Cerasuolo  * cd08d80ecdac("mm: correctly charge compressed memory to its memcg").
486a549f9f3SDomenico Cerasuolo  *
487a549f9f3SDomenico Cerasuolo  * The test first allocates memory in a memcg, then raises min_free_kbytes to
488a549f9f3SDomenico Cerasuolo  * a very high value so that the allocation falls below low wm, then makes
489a549f9f3SDomenico Cerasuolo  * another allocation to trigger kswapd that should push the memcg-owned pages
490a549f9f3SDomenico Cerasuolo  * to zswap and verifies that the zswap pages are correctly charged.
491a549f9f3SDomenico Cerasuolo  *
492a549f9f3SDomenico Cerasuolo  * To be run on a VM with at most 4G of memory.
493a549f9f3SDomenico Cerasuolo  */
test_no_kmem_bypass(const char * root)494a549f9f3SDomenico Cerasuolo static int test_no_kmem_bypass(const char *root)
495a549f9f3SDomenico Cerasuolo {
496a549f9f3SDomenico Cerasuolo 	size_t min_free_kb_high, min_free_kb_low, min_free_kb_original;
497a549f9f3SDomenico Cerasuolo 	struct no_kmem_bypass_child_args *values;
498a549f9f3SDomenico Cerasuolo 	size_t trigger_allocation_size;
499a549f9f3SDomenico Cerasuolo 	int wait_child_iteration = 0;
500a549f9f3SDomenico Cerasuolo 	long stored_pages_threshold;
501a549f9f3SDomenico Cerasuolo 	struct sysinfo sys_info;
502a549f9f3SDomenico Cerasuolo 	int ret = KSFT_FAIL;
503a549f9f3SDomenico Cerasuolo 	int child_status;
5048f6d24a5SJohn Hubbard 	char *test_group = NULL;
505a549f9f3SDomenico Cerasuolo 	pid_t child_pid;
506a549f9f3SDomenico Cerasuolo 
507a549f9f3SDomenico Cerasuolo 	/* Read sys info and compute test values accordingly */
508a549f9f3SDomenico Cerasuolo 	if (sysinfo(&sys_info) != 0)
509a549f9f3SDomenico Cerasuolo 		return KSFT_FAIL;
510a549f9f3SDomenico Cerasuolo 	if (sys_info.totalram > 5000000000)
511a549f9f3SDomenico Cerasuolo 		return KSFT_SKIP;
512a549f9f3SDomenico Cerasuolo 	values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ |
513a549f9f3SDomenico Cerasuolo 			PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
514a549f9f3SDomenico Cerasuolo 	if (values == MAP_FAILED)
515a549f9f3SDomenico Cerasuolo 		return KSFT_FAIL;
516a549f9f3SDomenico Cerasuolo 	if (read_min_free_kb(&min_free_kb_original))
517a549f9f3SDomenico Cerasuolo 		return KSFT_FAIL;
518a549f9f3SDomenico Cerasuolo 	min_free_kb_high = sys_info.totalram / 2000;
519a549f9f3SDomenico Cerasuolo 	min_free_kb_low = sys_info.totalram / 500000;
520a549f9f3SDomenico Cerasuolo 	values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) +
521a549f9f3SDomenico Cerasuolo 		sys_info.totalram * 5 / 100;
522a549f9f3SDomenico Cerasuolo 	stored_pages_threshold = sys_info.totalram / 5 / 4096;
523a549f9f3SDomenico Cerasuolo 	trigger_allocation_size = sys_info.totalram / 20;
524a549f9f3SDomenico Cerasuolo 
525a549f9f3SDomenico Cerasuolo 	/* Set up test memcg */
526a549f9f3SDomenico Cerasuolo 	test_group = cg_name(root, "kmem_bypass_test");
527a549f9f3SDomenico Cerasuolo 	if (!test_group)
528a549f9f3SDomenico Cerasuolo 		goto out;
529a549f9f3SDomenico Cerasuolo 
530a549f9f3SDomenico Cerasuolo 	/* Spawn memcg child and wait for it to allocate */
531a549f9f3SDomenico Cerasuolo 	set_min_free_kb(min_free_kb_low);
532a549f9f3SDomenico Cerasuolo 	if (cg_create(test_group))
533a549f9f3SDomenico Cerasuolo 		goto out;
534a549f9f3SDomenico Cerasuolo 	values->child_allocated = false;
535a549f9f3SDomenico Cerasuolo 	child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values);
536a549f9f3SDomenico Cerasuolo 	if (child_pid < 0)
537a549f9f3SDomenico Cerasuolo 		goto out;
538a549f9f3SDomenico Cerasuolo 	while (!values->child_allocated && wait_child_iteration++ < 10000)
539a549f9f3SDomenico Cerasuolo 		usleep(1000);
540a549f9f3SDomenico Cerasuolo 
541a549f9f3SDomenico Cerasuolo 	/* Try to wakeup kswapd and let it push child memory to zswap */
542a549f9f3SDomenico Cerasuolo 	set_min_free_kb(min_free_kb_high);
543a549f9f3SDomenico Cerasuolo 	for (int i = 0; i < 20; i++) {
544a549f9f3SDomenico Cerasuolo 		size_t stored_pages;
545a549f9f3SDomenico Cerasuolo 		char *trigger_allocation = malloc(trigger_allocation_size);
546a549f9f3SDomenico Cerasuolo 
547a549f9f3SDomenico Cerasuolo 		if (!trigger_allocation)
548a549f9f3SDomenico Cerasuolo 			break;
549a549f9f3SDomenico Cerasuolo 		for (int i = 0; i < trigger_allocation_size; i += 4095)
550a549f9f3SDomenico Cerasuolo 			trigger_allocation[i] = 'b';
551a549f9f3SDomenico Cerasuolo 		usleep(100000);
552a549f9f3SDomenico Cerasuolo 		free(trigger_allocation);
553a549f9f3SDomenico Cerasuolo 		if (get_zswap_stored_pages(&stored_pages))
554a549f9f3SDomenico Cerasuolo 			break;
555a549f9f3SDomenico Cerasuolo 		if (stored_pages < 0)
556a549f9f3SDomenico Cerasuolo 			break;
557a549f9f3SDomenico Cerasuolo 		/* If memory was pushed to zswap, verify it belongs to memcg */
558a549f9f3SDomenico Cerasuolo 		if (stored_pages > stored_pages_threshold) {
559a549f9f3SDomenico Cerasuolo 			int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped ");
560a549f9f3SDomenico Cerasuolo 			int delta = stored_pages * 4096 - zswapped;
561a549f9f3SDomenico Cerasuolo 			int result_ok = delta < stored_pages * 4096 / 4;
562a549f9f3SDomenico Cerasuolo 
563a549f9f3SDomenico Cerasuolo 			ret = result_ok ? KSFT_PASS : KSFT_FAIL;
564a549f9f3SDomenico Cerasuolo 			break;
565a549f9f3SDomenico Cerasuolo 		}
566a549f9f3SDomenico Cerasuolo 	}
567a549f9f3SDomenico Cerasuolo 
568a549f9f3SDomenico Cerasuolo 	kill(child_pid, SIGTERM);
569a549f9f3SDomenico Cerasuolo 	waitpid(child_pid, &child_status, 0);
570a549f9f3SDomenico Cerasuolo out:
571a549f9f3SDomenico Cerasuolo 	set_min_free_kb(min_free_kb_original);
572a549f9f3SDomenico Cerasuolo 	cg_destroy(test_group);
573a549f9f3SDomenico Cerasuolo 	free(test_group);
574a549f9f3SDomenico Cerasuolo 	return ret;
575a549f9f3SDomenico Cerasuolo }
576a549f9f3SDomenico Cerasuolo 
577fe3b1bf1SDomenico Cerasuolo #define T(x) { x, #x }
578fe3b1bf1SDomenico Cerasuolo struct zswap_test {
579fe3b1bf1SDomenico Cerasuolo 	int (*fn)(const char *root);
580fe3b1bf1SDomenico Cerasuolo 	const char *name;
581fe3b1bf1SDomenico Cerasuolo } tests[] = {
5826479b292SNhat Pham 	T(test_zswap_usage),
583b93c28ffSNhat Pham 	T(test_swapin_nozswap),
584b93c28ffSNhat Pham 	T(test_zswapin),
585158863e5SUsama Arif 	T(test_zswap_writeback_enabled),
586158863e5SUsama Arif 	T(test_zswap_writeback_disabled),
587a549f9f3SDomenico Cerasuolo 	T(test_no_kmem_bypass),
588d9cfaf40SDomenico Cerasuolo 	T(test_no_invasive_cgroup_shrink),
589fe3b1bf1SDomenico Cerasuolo };
590fe3b1bf1SDomenico Cerasuolo #undef T
591fe3b1bf1SDomenico Cerasuolo 
zswap_configured(void)592fe3b1bf1SDomenico Cerasuolo static bool zswap_configured(void)
593fe3b1bf1SDomenico Cerasuolo {
594fe3b1bf1SDomenico Cerasuolo 	return access("/sys/module/zswap", F_OK) == 0;
595fe3b1bf1SDomenico Cerasuolo }
596fe3b1bf1SDomenico Cerasuolo 
main(int argc,char ** argv)597fe3b1bf1SDomenico Cerasuolo int main(int argc, char **argv)
598fe3b1bf1SDomenico Cerasuolo {
599fe3b1bf1SDomenico Cerasuolo 	char root[PATH_MAX];
600fe3b1bf1SDomenico Cerasuolo 	int i, ret = EXIT_SUCCESS;
601fe3b1bf1SDomenico Cerasuolo 
6024793cb59STianchen Ding 	if (cg_find_unified_root(root, sizeof(root), NULL))
603fe3b1bf1SDomenico Cerasuolo 		ksft_exit_skip("cgroup v2 isn't mounted\n");
604fe3b1bf1SDomenico Cerasuolo 
605fe3b1bf1SDomenico Cerasuolo 	if (!zswap_configured())
606fe3b1bf1SDomenico Cerasuolo 		ksft_exit_skip("zswap isn't configured\n");
607fe3b1bf1SDomenico Cerasuolo 
608fe3b1bf1SDomenico Cerasuolo 	/*
609fe3b1bf1SDomenico Cerasuolo 	 * Check that memory controller is available:
610fe3b1bf1SDomenico Cerasuolo 	 * memory is listed in cgroup.controllers
611fe3b1bf1SDomenico Cerasuolo 	 */
612fe3b1bf1SDomenico Cerasuolo 	if (cg_read_strstr(root, "cgroup.controllers", "memory"))
613fe3b1bf1SDomenico Cerasuolo 		ksft_exit_skip("memory controller isn't available\n");
614fe3b1bf1SDomenico Cerasuolo 
615fe3b1bf1SDomenico Cerasuolo 	if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
616fe3b1bf1SDomenico Cerasuolo 		if (cg_write(root, "cgroup.subtree_control", "+memory"))
617fe3b1bf1SDomenico Cerasuolo 			ksft_exit_skip("Failed to set memory controller\n");
618fe3b1bf1SDomenico Cerasuolo 
619fe3b1bf1SDomenico Cerasuolo 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
620fe3b1bf1SDomenico Cerasuolo 		switch (tests[i].fn(root)) {
621fe3b1bf1SDomenico Cerasuolo 		case KSFT_PASS:
622fe3b1bf1SDomenico Cerasuolo 			ksft_test_result_pass("%s\n", tests[i].name);
623fe3b1bf1SDomenico Cerasuolo 			break;
624fe3b1bf1SDomenico Cerasuolo 		case KSFT_SKIP:
625fe3b1bf1SDomenico Cerasuolo 			ksft_test_result_skip("%s\n", tests[i].name);
626fe3b1bf1SDomenico Cerasuolo 			break;
627fe3b1bf1SDomenico Cerasuolo 		default:
628fe3b1bf1SDomenico Cerasuolo 			ret = EXIT_FAILURE;
629fe3b1bf1SDomenico Cerasuolo 			ksft_test_result_fail("%s\n", tests[i].name);
630fe3b1bf1SDomenico Cerasuolo 			break;
631fe3b1bf1SDomenico Cerasuolo 		}
632fe3b1bf1SDomenico Cerasuolo 	}
633fe3b1bf1SDomenico Cerasuolo 
634fe3b1bf1SDomenico Cerasuolo 	return ret;
635fe3b1bf1SDomenico Cerasuolo }
636