1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 
4 #include <linux/limits.h>
5 #include <unistd.h>
6 #include <stdio.h>
7 #include <signal.h>
8 #include <sys/sysinfo.h>
9 #include <string.h>
10 #include <sys/wait.h>
11 #include <sys/mman.h>
12 
13 #include "../kselftest.h"
14 #include "cgroup_util.h"
15 
16 static int read_int(const char *path, size_t *value)
17 {
18 	FILE *file;
19 	int ret = 0;
20 
21 	file = fopen(path, "r");
22 	if (!file)
23 		return -1;
24 	if (fscanf(file, "%ld", value) != 1)
25 		ret = -1;
26 	fclose(file);
27 	return ret;
28 }
29 
30 static int set_min_free_kb(size_t value)
31 {
32 	FILE *file;
33 	int ret;
34 
35 	file = fopen("/proc/sys/vm/min_free_kbytes", "w");
36 	if (!file)
37 		return -1;
38 	ret = fprintf(file, "%ld\n", value);
39 	fclose(file);
40 	return ret;
41 }
42 
43 static int read_min_free_kb(size_t *value)
44 {
45 	return read_int("/proc/sys/vm/min_free_kbytes", value);
46 }
47 
48 static int get_zswap_stored_pages(size_t *value)
49 {
50 	return read_int("/sys/kernel/debug/zswap/stored_pages", value);
51 }
52 
53 static int get_zswap_written_back_pages(size_t *value)
54 {
55 	return read_int("/sys/kernel/debug/zswap/written_back_pages", value);
56 }
57 
58 static long get_zswpout(const char *cgroup)
59 {
60 	return cg_read_key_long(cgroup, "memory.stat", "zswpout ");
61 }
62 
63 static int allocate_bytes(const char *cgroup, void *arg)
64 {
65 	size_t size = (size_t)arg;
66 	char *mem = (char *)malloc(size);
67 
68 	if (!mem)
69 		return -1;
70 	for (int i = 0; i < size; i += 4095)
71 		mem[i] = 'a';
72 	free(mem);
73 	return 0;
74 }
75 
76 /*
77  * Sanity test to check that pages are written into zswap.
78  */
79 static int test_zswap_usage(const char *root)
80 {
81 	long zswpout_before, zswpout_after;
82 	int ret = KSFT_FAIL;
83 	char *test_group;
84 
85 	/* Set up */
86 	test_group = cg_name(root, "no_shrink_test");
87 	if (!test_group)
88 		goto out;
89 	if (cg_create(test_group))
90 		goto out;
91 	if (cg_write(test_group, "memory.max", "1M"))
92 		goto out;
93 
94 	zswpout_before = get_zswpout(test_group);
95 	if (zswpout_before < 0) {
96 		ksft_print_msg("Failed to get zswpout\n");
97 		goto out;
98 	}
99 
100 	/* Allocate more than memory.max to push memory into zswap */
101 	if (cg_run(test_group, allocate_bytes, (void *)MB(4)))
102 		goto out;
103 
104 	/* Verify that pages come into zswap */
105 	zswpout_after = get_zswpout(test_group);
106 	if (zswpout_after <= zswpout_before) {
107 		ksft_print_msg("zswpout does not increase after test program\n");
108 		goto out;
109 	}
110 	ret = KSFT_PASS;
111 
112 out:
113 	cg_destroy(test_group);
114 	free(test_group);
115 	return ret;
116 }
117 
118 /*
119  * When trying to store a memcg page in zswap, if the memcg hits its memory
120  * limit in zswap, writeback should not be triggered.
121  *
122  * This was fixed with commit 0bdf0efa180a("zswap: do not shrink if cgroup may
123  * not zswap"). Needs to be revised when a per memcg writeback mechanism is
124  * implemented.
125  */
126 static int test_no_invasive_cgroup_shrink(const char *root)
127 {
128 	size_t written_back_before, written_back_after;
129 	int ret = KSFT_FAIL;
130 	char *test_group;
131 
132 	/* Set up */
133 	test_group = cg_name(root, "no_shrink_test");
134 	if (!test_group)
135 		goto out;
136 	if (cg_create(test_group))
137 		goto out;
138 	if (cg_write(test_group, "memory.max", "1M"))
139 		goto out;
140 	if (cg_write(test_group, "memory.zswap.max", "10K"))
141 		goto out;
142 	if (get_zswap_written_back_pages(&written_back_before))
143 		goto out;
144 
145 	/* Allocate 10x memory.max to push memory into zswap */
146 	if (cg_run(test_group, allocate_bytes, (void *)MB(10)))
147 		goto out;
148 
149 	/* Verify that no writeback happened because of the memcg allocation */
150 	if (get_zswap_written_back_pages(&written_back_after))
151 		goto out;
152 	if (written_back_after == written_back_before)
153 		ret = KSFT_PASS;
154 out:
155 	cg_destroy(test_group);
156 	free(test_group);
157 	return ret;
158 }
159 
160 struct no_kmem_bypass_child_args {
161 	size_t target_alloc_bytes;
162 	size_t child_allocated;
163 };
164 
165 static int no_kmem_bypass_child(const char *cgroup, void *arg)
166 {
167 	struct no_kmem_bypass_child_args *values = arg;
168 	void *allocation;
169 
170 	allocation = malloc(values->target_alloc_bytes);
171 	if (!allocation) {
172 		values->child_allocated = true;
173 		return -1;
174 	}
175 	for (long i = 0; i < values->target_alloc_bytes; i += 4095)
176 		((char *)allocation)[i] = 'a';
177 	values->child_allocated = true;
178 	pause();
179 	free(allocation);
180 	return 0;
181 }
182 
183 /*
184  * When pages owned by a memcg are pushed to zswap by kswapd, they should be
185  * charged to that cgroup. This wasn't the case before commit
186  * cd08d80ecdac("mm: correctly charge compressed memory to its memcg").
187  *
188  * The test first allocates memory in a memcg, then raises min_free_kbytes to
189  * a very high value so that the allocation falls below low wm, then makes
190  * another allocation to trigger kswapd that should push the memcg-owned pages
191  * to zswap and verifies that the zswap pages are correctly charged.
192  *
193  * To be run on a VM with at most 4G of memory.
194  */
195 static int test_no_kmem_bypass(const char *root)
196 {
197 	size_t min_free_kb_high, min_free_kb_low, min_free_kb_original;
198 	struct no_kmem_bypass_child_args *values;
199 	size_t trigger_allocation_size;
200 	int wait_child_iteration = 0;
201 	long stored_pages_threshold;
202 	struct sysinfo sys_info;
203 	int ret = KSFT_FAIL;
204 	int child_status;
205 	char *test_group;
206 	pid_t child_pid;
207 
208 	/* Read sys info and compute test values accordingly */
209 	if (sysinfo(&sys_info) != 0)
210 		return KSFT_FAIL;
211 	if (sys_info.totalram > 5000000000)
212 		return KSFT_SKIP;
213 	values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ |
214 			PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
215 	if (values == MAP_FAILED)
216 		return KSFT_FAIL;
217 	if (read_min_free_kb(&min_free_kb_original))
218 		return KSFT_FAIL;
219 	min_free_kb_high = sys_info.totalram / 2000;
220 	min_free_kb_low = sys_info.totalram / 500000;
221 	values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) +
222 		sys_info.totalram * 5 / 100;
223 	stored_pages_threshold = sys_info.totalram / 5 / 4096;
224 	trigger_allocation_size = sys_info.totalram / 20;
225 
226 	/* Set up test memcg */
227 	if (cg_write(root, "cgroup.subtree_control", "+memory"))
228 		goto out;
229 	test_group = cg_name(root, "kmem_bypass_test");
230 	if (!test_group)
231 		goto out;
232 
233 	/* Spawn memcg child and wait for it to allocate */
234 	set_min_free_kb(min_free_kb_low);
235 	if (cg_create(test_group))
236 		goto out;
237 	values->child_allocated = false;
238 	child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values);
239 	if (child_pid < 0)
240 		goto out;
241 	while (!values->child_allocated && wait_child_iteration++ < 10000)
242 		usleep(1000);
243 
244 	/* Try to wakeup kswapd and let it push child memory to zswap */
245 	set_min_free_kb(min_free_kb_high);
246 	for (int i = 0; i < 20; i++) {
247 		size_t stored_pages;
248 		char *trigger_allocation = malloc(trigger_allocation_size);
249 
250 		if (!trigger_allocation)
251 			break;
252 		for (int i = 0; i < trigger_allocation_size; i += 4095)
253 			trigger_allocation[i] = 'b';
254 		usleep(100000);
255 		free(trigger_allocation);
256 		if (get_zswap_stored_pages(&stored_pages))
257 			break;
258 		if (stored_pages < 0)
259 			break;
260 		/* If memory was pushed to zswap, verify it belongs to memcg */
261 		if (stored_pages > stored_pages_threshold) {
262 			int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped ");
263 			int delta = stored_pages * 4096 - zswapped;
264 			int result_ok = delta < stored_pages * 4096 / 4;
265 
266 			ret = result_ok ? KSFT_PASS : KSFT_FAIL;
267 			break;
268 		}
269 	}
270 
271 	kill(child_pid, SIGTERM);
272 	waitpid(child_pid, &child_status, 0);
273 out:
274 	set_min_free_kb(min_free_kb_original);
275 	cg_destroy(test_group);
276 	free(test_group);
277 	return ret;
278 }
279 
280 #define T(x) { x, #x }
281 struct zswap_test {
282 	int (*fn)(const char *root);
283 	const char *name;
284 } tests[] = {
285 	T(test_zswap_usage),
286 	T(test_no_kmem_bypass),
287 	T(test_no_invasive_cgroup_shrink),
288 };
289 #undef T
290 
291 static bool zswap_configured(void)
292 {
293 	return access("/sys/module/zswap", F_OK) == 0;
294 }
295 
296 int main(int argc, char **argv)
297 {
298 	char root[PATH_MAX];
299 	int i, ret = EXIT_SUCCESS;
300 
301 	if (cg_find_unified_root(root, sizeof(root)))
302 		ksft_exit_skip("cgroup v2 isn't mounted\n");
303 
304 	if (!zswap_configured())
305 		ksft_exit_skip("zswap isn't configured\n");
306 
307 	/*
308 	 * Check that memory controller is available:
309 	 * memory is listed in cgroup.controllers
310 	 */
311 	if (cg_read_strstr(root, "cgroup.controllers", "memory"))
312 		ksft_exit_skip("memory controller isn't available\n");
313 
314 	if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
315 		if (cg_write(root, "cgroup.subtree_control", "+memory"))
316 			ksft_exit_skip("Failed to set memory controller\n");
317 
318 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
319 		switch (tests[i].fn(root)) {
320 		case KSFT_PASS:
321 			ksft_test_result_pass("%s\n", tests[i].name);
322 			break;
323 		case KSFT_SKIP:
324 			ksft_test_result_skip("%s\n", tests[i].name);
325 			break;
326 		default:
327 			ret = EXIT_FAILURE;
328 			ksft_test_result_fail("%s\n", tests[i].name);
329 			break;
330 		}
331 	}
332 
333 	return ret;
334 }
335