1 /*
2  * Copyright (C) 2013-2021 Canonical, Ltd.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * This code is a complete clean re-write of the stress tool by
19  * Colin Ian King <colin.king@canonical.com> and attempts to be
20  * backwardly compatible with the stress tool by Amos Waterland
21  * <apw@rossby.metr.ou.edu> but has more stress tests and more
22  * functionality.
23  *
24  */
25 #include "stress-ng.h"
26 
27 static const stress_help_t help[] = {
28 	{ NULL,	"madvise N",	 "start N workers exercising madvise on memory" },
29 	{ NULL,	"madvise-ops N", "stop after N bogo madvise operations" },
30 	{ NULL,	NULL,		 NULL }
31 };
32 
33 #if defined(HAVE_MADVISE)
34 
35 #define NUM_MEM_RETRIES_MAX	(256)
36 #define NUM_POISON_MAX		(2)
37 #define NUM_PTHREADS		(8)
38 #if defined(MADV_SOFT_OFFLINE)
39 #define NUM_SOFT_OFFLINE_MAX	(2)
40 #endif
41 
42 typedef struct madvise_ctxt {
43 	const stress_args_t *args;
44 	void *buf;
45 	size_t sz;
46 	bool  is_thread;
47 } madvise_ctxt_t;
48 
49 static sigjmp_buf jmp_env;
50 static uint64_t sigbus_count;
51 
52 static const int madvise_options[] = {
53 #if defined(MADV_NORMAL)
54 	MADV_NORMAL,
55 #endif
56 #if defined(MADV_RANDOM)
57 	MADV_RANDOM,
58 #endif
59 #if defined(MADV_SEQUENTIAL)
60 	MADV_SEQUENTIAL,
61 #endif
62 #if defined(MADV_WILLNEED)
63 	MADV_WILLNEED,
64 #endif
65 #if defined(MADV_DONTNEED)
66 	MADV_DONTNEED,
67 #endif
68 #if defined(MADV_REMOVE)
69 	MADV_REMOVE,
70 #endif
71 #if defined(MADV_DONTFORK)
72 	MADV_DONTFORK,
73 #endif
74 #if defined(MADV_DOFORK)
75 	MADV_DOFORK,
76 #endif
77 #if defined(MADV_MERGEABLE)
78 	MADV_MERGEABLE,
79 #endif
80 #if defined(MADV_UNMERGEABLE)
81 	MADV_UNMERGEABLE,
82 #endif
83 #if defined(MADV_SOFT_OFFLINE)
84 	MADV_SOFT_OFFLINE,
85 #endif
86 #if defined(MADV_HUGEPAGE)
87 	MADV_HUGEPAGE,
88 #endif
89 #if defined(MADV_NOHUGEPAGE)
90 	MADV_NOHUGEPAGE,
91 #endif
92 #if defined(MADV_DONTDUMP)
93 	MADV_DONTDUMP,
94 #endif
95 #if defined(MADV_DODUMP)
96 	MADV_DODUMP,
97 #endif
98 #if defined(MADV_FREE)
99 	MADV_FREE,
100 #endif
101 #if defined(MADV_HWPOISON)
102 	MADV_HWPOISON,
103 #endif
104 #if defined(MADV_WIPEONFORK)
105 	MADV_WIPEONFORK,
106 #endif
107 #if defined(MADV_KEEPONFORK)
108 	MADV_KEEPONFORK,
109 #endif
110 #if defined(MADV_INHERIT_ZERO)
111 	MADV_INHERIT_ZERO,
112 #endif
113 #if defined(MADV_COLD)
114 	MADV_COLD,
115 #endif
116 #if defined(MADV_PAGEOUT)
117 	MADV_PAGEOUT,
118 #endif
119 /* FreeBSD */
120 #if defined(MADV_AUTOSYNC)
121 	MADV_AUTOSYNC,
122 #endif
123 /* FreeBSD */
124 #if defined(MADV_CORE)
125 	MADV_CORE,
126 #endif
127 /* FreeBSD */
128 #if defined(MADV_PROTECT)
129 	MADV_PROTECT,
130 #endif
131 /* Linux 5.14 */
132 #if defined(MADV_POPULATE_READ)
133 	MADV_POPULATE_READ,
134 #endif
135 /* Linux 5.14 */
136 #if defined(MADV_POPULATE_WRITE)
137 	MADV_POPULATE_WRITE,
138 #endif
139 /* OpenBSD */
140 #if defined(MADV_SPACEAVAIL)
141 	MADV_SPACEAVAIL,
142 #endif
143 /* OS X */
144 #if defined(MADV_ZERO_WIRED_PAGES)
145 	MADV_ZERO_WIRED_PAGES,
146 #endif
147 };
148 
149 /*
150  *  stress_sigbus_handler()
151  *     SIGBUS handler
152  */
stress_sigbus_handler(int signum)153 static void NORETURN MLOCKED_TEXT stress_sigbus_handler(int signum)
154 {
155 	(void)signum;
156 
157 	sigbus_count++;
158 
159 	siglongjmp(jmp_env, 1);
160 }
161 
162 /*
163  *  stress_random_advise()
164  *	get a random advise option
165  */
stress_random_advise(const stress_args_t * args)166 static int stress_random_advise(const stress_args_t *args)
167 {
168 	const int idx = stress_mwc32() % SIZEOF_ARRAY(madvise_options);	/* cppcheck-suppress moduloofone */
169 	const int advise = madvise_options[idx];
170 #if defined(MADV_HWPOISON) || defined(MADV_SOFT_OFFLINE)
171 	static int poison_count;
172 #if defined(MADV_NORMAL)
173 	const int madv_normal = MADV_NORMAL;
174 #else
175 	const int madv_normal = 0;
176 #endif
177 #endif
178 
179 #if defined(MADV_HWPOISON)
180 	if (advise == MADV_HWPOISON) {
181 		/*
182 		 * Try for another madvise option if
183 		 * we've poisoned too many pages.
184 		 * We really need to use this sparingly
185 		 * else we run out of free memory
186 		 */
187 		if ((args->instance > 0) ||
188 		    (poison_count >= NUM_POISON_MAX))
189 			return madv_normal;
190 		poison_count++;
191 	}
192 #else
193 	(void)args;
194 #endif
195 
196 #if defined(MADV_SOFT_OFFLINE)
197 	if (advise == MADV_SOFT_OFFLINE) {
198 		static int soft_offline_count;
199 
200 		/* ..and minimize number of soft offline pages */
201 		if ((soft_offline_count >= NUM_SOFT_OFFLINE_MAX) ||
202 		    (poison_count >= NUM_POISON_MAX))
203 			return madv_normal;
204 		soft_offline_count++;
205 	}
206 #endif
207 	return advise;
208 }
209 
210 /*
211  *  stress_madvise_pages()
212  *	exercise madvise settings
213  */
stress_madvise_pages(void * arg)214 static void *stress_madvise_pages(void *arg)
215 {
216 	size_t n;
217 	const madvise_ctxt_t *ctxt = (const madvise_ctxt_t *)arg;
218 	const stress_args_t *args = ctxt->args;
219 	void *buf = ctxt->buf;
220 	const size_t sz = ctxt->sz;
221 	const size_t page_size = args->page_size;
222 	static void *nowt = NULL;
223 
224 	if (ctxt->is_thread) {
225 		sigset_t set;
226 
227 		sigemptyset(&set);
228 		sigaddset(&set, SIGBUS);
229 
230 		(void)pthread_sigmask(SIG_SETMASK, &set, NULL);
231 	}
232 
233 	for (n = 0; n < sz; n += page_size) {
234 		const int advise = stress_random_advise(args);
235 		void *ptr = (void *)(((uint8_t *)buf) + n);
236 
237 		(void)shim_madvise(ptr, page_size, advise);
238 		(void)shim_msync(ptr, page_size, MS_ASYNC);
239 	}
240 	for (n = 0; n < sz; n += page_size) {
241 		size_t m = (stress_mwc64() % sz) & ~(page_size - 1);
242 		const int advise = stress_random_advise(args);
243 		void *ptr = (void *)(((uint8_t *)buf) + m);
244 
245 		(void)shim_madvise(ptr, page_size, advise);
246 		(void)shim_msync(ptr, page_size, MS_ASYNC);
247 	}
248 
249 	/*
250 	 *  Exercise a highly likely bad advice option
251 	 */
252 	(void)shim_madvise(buf, page_size, ~0);
253 
254 #if defined(MADV_NORMAL)
255 	/*
256 	 *  Exercise with non-page aligned address
257 	 */
258 	(void)shim_madvise(((uint8_t *)buf) + 1, page_size, MADV_NORMAL);
259 #endif
260 #if defined(_POSIX_MEMLOCK_RANGE) &&	\
261     defined(HAVE_MLOCK) &&		\
262     (defined(MADV_REMOVE) || defined(MADV_DONTNEED))
263 	{
264 		int ret;
265 
266 		/*
267 		 *  Exercise MADV_REMOVE on locked page, should
268 		 *  generate EINVAL
269 		 */
270 		ret = shim_mlock(buf, page_size);
271 		if (ret == 0) {
272 #if defined(MADV_REMOVE)
273 			(void)shim_madvise(buf, page_size, MADV_REMOVE);
274 #endif
275 #if defined(MADV_DONTNEED)
276 			(void)shim_madvise(buf, page_size, MADV_DONTNEED);
277 #endif
278 			shim_munlock(buf, page_size);
279 		}
280 	}
281 #endif
282 
283 #if defined(MADV_NORMAL)
284 	{
285 		void *unmapped;
286 
287 		/*
288 		 *  Exercise an unmapped page
289 		 */
290 		unmapped = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
291 				MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
292 		if (unmapped != MAP_FAILED) {
293 			(void)munmap(unmapped, page_size);
294 			(void)shim_madvise(unmapped, page_size, MADV_NORMAL);
295 		}
296 	}
297 #endif
298 
299 	return &nowt;
300 }
301 
stress_process_madvise(const pid_t pid,void * buf,const size_t sz)302 static void stress_process_madvise(const pid_t pid, void *buf, const size_t sz)
303 {
304 	int pidfd, ret;
305 	struct iovec vec;
306 
307 	(void)pid;
308 
309 	vec.iov_base = buf;
310 	vec.iov_len = sz;
311 
312 	pidfd = shim_pidfd_open(pid, 0);
313 	if (pidfd >= 0) {
314 #if defined(MADV_PAGEOUT)
315 		ret = shim_process_madvise(pidfd, &vec, 1, MADV_PAGEOUT, 0);
316 		(void)ret;
317 #endif
318 #if defined(MADV_COLD)
319 		ret = shim_process_madvise(pidfd, &vec, 1, MADV_COLD, 0);
320 		(void)ret;
321 #endif
322 
323 		/* exercise invalid behaviour */
324 		ret = shim_process_madvise(pidfd, &vec, 1, ~0, 0);
325 		(void)ret;
326 
327 #if defined(MADV_PAGEOUT)
328 		/* exercise invalid flags */
329 		ret = shim_process_madvise(pidfd, &vec, 1, MADV_PAGEOUT, ~0);
330 		(void)ret;
331 #endif
332 
333 		(void)close(pidfd);
334 	}
335 
336 #if defined(MADV_PAGEOUT)
337 	/* exercise invalid pidfd */
338 	ret = shim_process_madvise(-1, &vec, 1, MADV_PAGEOUT, 0);
339 #endif
340 	(void)ret;
341 }
342 
343 /*
344  *  stress_madvise()
345  *	stress madvise
346  */
stress_madvise(const stress_args_t * args)347 static int stress_madvise(const stress_args_t *args)
348 {
349 	const size_t page_size = args->page_size;
350 	const size_t sz = (4 *  MB) & ~(page_size - 1);
351 	const pid_t pid = getpid();
352 	int fd = -1;
353 	NOCLOBBER int ret;
354 	NOCLOBBER int flags = MAP_PRIVATE;
355 	NOCLOBBER int num_mem_retries = 0;
356 	char filename[PATH_MAX];
357 	char page[page_size];
358 	size_t n;
359 	madvise_ctxt_t ctxt;
360 
361 	ret = sigsetjmp(jmp_env, 1);
362 	if (ret) {
363 		pr_fail("%s: sigsetjmp failed\n", args->name);
364 		return EXIT_FAILURE;
365 	}
366 
367 	if (stress_sighandler(args->name, SIGBUS, stress_sigbus_handler, NULL) < 0)
368 		return EXIT_FAILURE;
369 
370 #if defined(MAP_POPULATE)
371 	flags |= MAP_POPULATE;
372 #endif
373 
374 	/* Make sure this is killable by OOM killer */
375 	stress_set_oom_adjustment(args->name, true);
376 
377 	(void)memset(page, 0xa5, page_size);
378 
379 	ret = stress_temp_dir_mk_args(args);
380 	if (ret < 0)
381 		return exit_status(-ret);
382 
383 	(void)stress_temp_filename_args(args,
384 		filename, sizeof(filename), stress_mwc32());
385 
386 	if ((fd = open(filename, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR)) < 0) {
387 		ret = exit_status(errno);
388 		pr_fail("%s: open %s failed, errno=%d (%s)\n",
389 			args->name, filename, errno, strerror(errno));
390 		(void)unlink(filename);
391 		(void)stress_temp_dir_rm_args(args);
392 		return ret;
393 	}
394 
395 	(void)unlink(filename);
396 	for (n = 0; n < sz; n += page_size) {
397 		ssize_t wret;
398 
399 		wret = write(fd, page, sizeof(page));
400 		(void)wret;
401 	}
402 
403 	stress_set_proc_state(args->name, STRESS_STATE_RUN);
404 
405 	do {
406 		NOCLOBBER void *buf;
407 
408 		if (num_mem_retries >= NUM_MEM_RETRIES_MAX) {
409 			pr_err("%s: gave up trying to mmap, no available memory\n",
410 				args->name);
411 			break;
412 		}
413 
414 		if (!keep_stressing_flag())
415 			break;
416 
417 		if (stress_mwc1()) {
418 			buf = mmap(NULL, sz, PROT_READ | PROT_WRITE, flags, fd, 0);
419 		} else {
420 			buf = mmap(NULL, sz, PROT_READ | PROT_WRITE,
421 				flags | MAP_ANONYMOUS, 0, 0);
422 		}
423 		if (buf == MAP_FAILED) {
424 			/* Force MAP_POPULATE off, just in case */
425 #if defined(MAP_POPULATE)
426 			flags &= ~MAP_POPULATE;
427 #endif
428 			num_mem_retries++;
429 			if (num_mem_retries > 1)
430 				(void)shim_usleep(100000);
431 			continue;	/* Try again */
432 		}
433 		ret = sigsetjmp(jmp_env, 1);
434 		if (ret) {
435 			(void)munmap((void *)buf, sz);
436 			/* Try again */
437 			continue;
438 		}
439 
440 		(void)memset(buf, 0xff, sz);
441 		(void)stress_madvise_random(buf, sz);
442 		(void)stress_mincore_touch_pages(buf, sz);
443 		stress_process_madvise(pid, buf, sz);
444 
445 		ctxt.args = args;
446 		ctxt.buf = buf;
447 		ctxt.sz = sz;
448 
449 #if defined(HAVE_LIB_PTHREAD)
450 		{
451 			pthread_t pthreads[NUM_PTHREADS];
452 			int rets[NUM_PTHREADS];
453 			size_t i;
454 
455 			ctxt.is_thread = true;
456 
457 			for (i = 0; i < NUM_PTHREADS; i++) {
458 				rets[i] = pthread_create(&pthreads[i], NULL,
459 						stress_madvise_pages, (void *)&ctxt);
460 			}
461 			for (i = 0; i < NUM_PTHREADS; i++) {
462 				if (rets[i] == 0)
463 					(void)pthread_join(pthreads[i], NULL);
464 			}
465 		}
466 #else
467 		{
468 			ctxt.is_thread = false;
469 			stress_madvise_pages(&ctxt);
470 		}
471 #endif
472 
473 #if defined(MADV_NORMAL)
474 		/* Exercise no-op madvise on 0 size */
475 		(void)madvise(buf, 0, MADV_NORMAL);
476 
477 		/* Invalid size, ENOMEM */
478 		(void)madvise(buf, 0xffff0000, MADV_NORMAL);
479 
480 		/* Invalid advice option, EINVAL */
481 		(void)madvise(buf, sz, ~0);
482 
483 #endif
484 		(void)munmap((void *)buf, sz);
485 
486 
487 #if defined(MADV_NORMAL)
488 		{
489 			void *bad_addr = (void *)(~(uintptr_t)0 & ~(page_size -1));
490 
491 			/* Invalid madvise on unmapped pages */
492 			(void)madvise(buf, sz, MADV_NORMAL);
493 
494 			/* Invalid madvise on wrapped address */
495 			(void)madvise(bad_addr, page_size * 2, MADV_NORMAL);
496 		}
497 #endif
498 
499 		inc_counter(args);
500 	} while (keep_stressing(args));
501 
502 	stress_set_proc_state(args->name, STRESS_STATE_DEINIT);
503 
504 	(void)close(fd);
505 	(void)stress_temp_dir_rm_args(args);
506 
507 	if (sigbus_count)
508 		pr_inf("%s: caught %" PRIu64 " SIGBUS signal%s\n",
509 			args->name, sigbus_count, sigbus_count == 1 ? "" : "s");
510 	return EXIT_SUCCESS;
511 }
512 
513 stressor_info_t stress_madvise_info = {
514 	.stressor = stress_madvise,
515 	.class = CLASS_VM | CLASS_OS,
516 	.help = help
517 };
518 #else
519 stressor_info_t stress_madvise_info = {
520 	.stressor = stress_not_implemented,
521 	.class = CLASS_VM | CLASS_OS,
522 	.help = help
523 };
524 #endif
525