1 /*
2 * Copyright (C) 2013-2021 Canonical, Ltd.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 *
18 * This code is a complete clean re-write of the stress tool by
19 * Colin Ian King <colin.king@canonical.com> and attempts to be
20 * backwardly compatible with the stress tool by Amos Waterland
21 * <apw@rossby.metr.ou.edu> but has more stress tests and more
22 * functionality.
23 *
24 */
25 #include "stress-ng.h"
26
27 static const stress_help_t help[] = {
28 { NULL, "madvise N", "start N workers exercising madvise on memory" },
29 { NULL, "madvise-ops N", "stop after N bogo madvise operations" },
30 { NULL, NULL, NULL }
31 };
32
33 #if defined(HAVE_MADVISE)
34
35 #define NUM_MEM_RETRIES_MAX (256)
36 #define NUM_POISON_MAX (2)
37 #define NUM_PTHREADS (8)
38 #if defined(MADV_SOFT_OFFLINE)
39 #define NUM_SOFT_OFFLINE_MAX (2)
40 #endif
41
42 typedef struct madvise_ctxt {
43 const stress_args_t *args;
44 void *buf;
45 size_t sz;
46 bool is_thread;
47 } madvise_ctxt_t;
48
49 static sigjmp_buf jmp_env;
50 static uint64_t sigbus_count;
51
52 static const int madvise_options[] = {
53 #if defined(MADV_NORMAL)
54 MADV_NORMAL,
55 #endif
56 #if defined(MADV_RANDOM)
57 MADV_RANDOM,
58 #endif
59 #if defined(MADV_SEQUENTIAL)
60 MADV_SEQUENTIAL,
61 #endif
62 #if defined(MADV_WILLNEED)
63 MADV_WILLNEED,
64 #endif
65 #if defined(MADV_DONTNEED)
66 MADV_DONTNEED,
67 #endif
68 #if defined(MADV_REMOVE)
69 MADV_REMOVE,
70 #endif
71 #if defined(MADV_DONTFORK)
72 MADV_DONTFORK,
73 #endif
74 #if defined(MADV_DOFORK)
75 MADV_DOFORK,
76 #endif
77 #if defined(MADV_MERGEABLE)
78 MADV_MERGEABLE,
79 #endif
80 #if defined(MADV_UNMERGEABLE)
81 MADV_UNMERGEABLE,
82 #endif
83 #if defined(MADV_SOFT_OFFLINE)
84 MADV_SOFT_OFFLINE,
85 #endif
86 #if defined(MADV_HUGEPAGE)
87 MADV_HUGEPAGE,
88 #endif
89 #if defined(MADV_NOHUGEPAGE)
90 MADV_NOHUGEPAGE,
91 #endif
92 #if defined(MADV_DONTDUMP)
93 MADV_DONTDUMP,
94 #endif
95 #if defined(MADV_DODUMP)
96 MADV_DODUMP,
97 #endif
98 #if defined(MADV_FREE)
99 MADV_FREE,
100 #endif
101 #if defined(MADV_HWPOISON)
102 MADV_HWPOISON,
103 #endif
104 #if defined(MADV_WIPEONFORK)
105 MADV_WIPEONFORK,
106 #endif
107 #if defined(MADV_KEEPONFORK)
108 MADV_KEEPONFORK,
109 #endif
110 #if defined(MADV_INHERIT_ZERO)
111 MADV_INHERIT_ZERO,
112 #endif
113 #if defined(MADV_COLD)
114 MADV_COLD,
115 #endif
116 #if defined(MADV_PAGEOUT)
117 MADV_PAGEOUT,
118 #endif
119 /* FreeBSD */
120 #if defined(MADV_AUTOSYNC)
121 MADV_AUTOSYNC,
122 #endif
123 /* FreeBSD */
124 #if defined(MADV_CORE)
125 MADV_CORE,
126 #endif
127 /* FreeBSD */
128 #if defined(MADV_PROTECT)
129 MADV_PROTECT,
130 #endif
131 /* Linux 5.14 */
132 #if defined(MADV_POPULATE_READ)
133 MADV_POPULATE_READ,
134 #endif
135 /* Linux 5.14 */
136 #if defined(MADV_POPULATE_WRITE)
137 MADV_POPULATE_WRITE,
138 #endif
139 /* OpenBSD */
140 #if defined(MADV_SPACEAVAIL)
141 MADV_SPACEAVAIL,
142 #endif
143 /* OS X */
144 #if defined(MADV_ZERO_WIRED_PAGES)
145 MADV_ZERO_WIRED_PAGES,
146 #endif
147 };
148
149 /*
150 * stress_sigbus_handler()
151 * SIGBUS handler
152 */
stress_sigbus_handler(int signum)153 static void NORETURN MLOCKED_TEXT stress_sigbus_handler(int signum)
154 {
155 (void)signum;
156
157 sigbus_count++;
158
159 siglongjmp(jmp_env, 1);
160 }
161
162 /*
163 * stress_random_advise()
164 * get a random advise option
165 */
stress_random_advise(const stress_args_t * args)166 static int stress_random_advise(const stress_args_t *args)
167 {
168 const int idx = stress_mwc32() % SIZEOF_ARRAY(madvise_options); /* cppcheck-suppress moduloofone */
169 const int advise = madvise_options[idx];
170 #if defined(MADV_HWPOISON) || defined(MADV_SOFT_OFFLINE)
171 static int poison_count;
172 #if defined(MADV_NORMAL)
173 const int madv_normal = MADV_NORMAL;
174 #else
175 const int madv_normal = 0;
176 #endif
177 #endif
178
179 #if defined(MADV_HWPOISON)
180 if (advise == MADV_HWPOISON) {
181 /*
182 * Try for another madvise option if
183 * we've poisoned too many pages.
184 * We really need to use this sparingly
185 * else we run out of free memory
186 */
187 if ((args->instance > 0) ||
188 (poison_count >= NUM_POISON_MAX))
189 return madv_normal;
190 poison_count++;
191 }
192 #else
193 (void)args;
194 #endif
195
196 #if defined(MADV_SOFT_OFFLINE)
197 if (advise == MADV_SOFT_OFFLINE) {
198 static int soft_offline_count;
199
200 /* ..and minimize number of soft offline pages */
201 if ((soft_offline_count >= NUM_SOFT_OFFLINE_MAX) ||
202 (poison_count >= NUM_POISON_MAX))
203 return madv_normal;
204 soft_offline_count++;
205 }
206 #endif
207 return advise;
208 }
209
210 /*
211 * stress_madvise_pages()
212 * exercise madvise settings
213 */
stress_madvise_pages(void * arg)214 static void *stress_madvise_pages(void *arg)
215 {
216 size_t n;
217 const madvise_ctxt_t *ctxt = (const madvise_ctxt_t *)arg;
218 const stress_args_t *args = ctxt->args;
219 void *buf = ctxt->buf;
220 const size_t sz = ctxt->sz;
221 const size_t page_size = args->page_size;
222 static void *nowt = NULL;
223
224 if (ctxt->is_thread) {
225 sigset_t set;
226
227 sigemptyset(&set);
228 sigaddset(&set, SIGBUS);
229
230 (void)pthread_sigmask(SIG_SETMASK, &set, NULL);
231 }
232
233 for (n = 0; n < sz; n += page_size) {
234 const int advise = stress_random_advise(args);
235 void *ptr = (void *)(((uint8_t *)buf) + n);
236
237 (void)shim_madvise(ptr, page_size, advise);
238 (void)shim_msync(ptr, page_size, MS_ASYNC);
239 }
240 for (n = 0; n < sz; n += page_size) {
241 size_t m = (stress_mwc64() % sz) & ~(page_size - 1);
242 const int advise = stress_random_advise(args);
243 void *ptr = (void *)(((uint8_t *)buf) + m);
244
245 (void)shim_madvise(ptr, page_size, advise);
246 (void)shim_msync(ptr, page_size, MS_ASYNC);
247 }
248
249 /*
250 * Exercise a highly likely bad advice option
251 */
252 (void)shim_madvise(buf, page_size, ~0);
253
254 #if defined(MADV_NORMAL)
255 /*
256 * Exercise with non-page aligned address
257 */
258 (void)shim_madvise(((uint8_t *)buf) + 1, page_size, MADV_NORMAL);
259 #endif
260 #if defined(_POSIX_MEMLOCK_RANGE) && \
261 defined(HAVE_MLOCK) && \
262 (defined(MADV_REMOVE) || defined(MADV_DONTNEED))
263 {
264 int ret;
265
266 /*
267 * Exercise MADV_REMOVE on locked page, should
268 * generate EINVAL
269 */
270 ret = shim_mlock(buf, page_size);
271 if (ret == 0) {
272 #if defined(MADV_REMOVE)
273 (void)shim_madvise(buf, page_size, MADV_REMOVE);
274 #endif
275 #if defined(MADV_DONTNEED)
276 (void)shim_madvise(buf, page_size, MADV_DONTNEED);
277 #endif
278 shim_munlock(buf, page_size);
279 }
280 }
281 #endif
282
283 #if defined(MADV_NORMAL)
284 {
285 void *unmapped;
286
287 /*
288 * Exercise an unmapped page
289 */
290 unmapped = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
291 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
292 if (unmapped != MAP_FAILED) {
293 (void)munmap(unmapped, page_size);
294 (void)shim_madvise(unmapped, page_size, MADV_NORMAL);
295 }
296 }
297 #endif
298
299 return &nowt;
300 }
301
stress_process_madvise(const pid_t pid,void * buf,const size_t sz)302 static void stress_process_madvise(const pid_t pid, void *buf, const size_t sz)
303 {
304 int pidfd, ret;
305 struct iovec vec;
306
307 (void)pid;
308
309 vec.iov_base = buf;
310 vec.iov_len = sz;
311
312 pidfd = shim_pidfd_open(pid, 0);
313 if (pidfd >= 0) {
314 #if defined(MADV_PAGEOUT)
315 ret = shim_process_madvise(pidfd, &vec, 1, MADV_PAGEOUT, 0);
316 (void)ret;
317 #endif
318 #if defined(MADV_COLD)
319 ret = shim_process_madvise(pidfd, &vec, 1, MADV_COLD, 0);
320 (void)ret;
321 #endif
322
323 /* exercise invalid behaviour */
324 ret = shim_process_madvise(pidfd, &vec, 1, ~0, 0);
325 (void)ret;
326
327 #if defined(MADV_PAGEOUT)
328 /* exercise invalid flags */
329 ret = shim_process_madvise(pidfd, &vec, 1, MADV_PAGEOUT, ~0);
330 (void)ret;
331 #endif
332
333 (void)close(pidfd);
334 }
335
336 #if defined(MADV_PAGEOUT)
337 /* exercise invalid pidfd */
338 ret = shim_process_madvise(-1, &vec, 1, MADV_PAGEOUT, 0);
339 #endif
340 (void)ret;
341 }
342
343 /*
344 * stress_madvise()
345 * stress madvise
346 */
stress_madvise(const stress_args_t * args)347 static int stress_madvise(const stress_args_t *args)
348 {
349 const size_t page_size = args->page_size;
350 const size_t sz = (4 * MB) & ~(page_size - 1);
351 const pid_t pid = getpid();
352 int fd = -1;
353 NOCLOBBER int ret;
354 NOCLOBBER int flags = MAP_PRIVATE;
355 NOCLOBBER int num_mem_retries = 0;
356 char filename[PATH_MAX];
357 char page[page_size];
358 size_t n;
359 madvise_ctxt_t ctxt;
360
361 ret = sigsetjmp(jmp_env, 1);
362 if (ret) {
363 pr_fail("%s: sigsetjmp failed\n", args->name);
364 return EXIT_FAILURE;
365 }
366
367 if (stress_sighandler(args->name, SIGBUS, stress_sigbus_handler, NULL) < 0)
368 return EXIT_FAILURE;
369
370 #if defined(MAP_POPULATE)
371 flags |= MAP_POPULATE;
372 #endif
373
374 /* Make sure this is killable by OOM killer */
375 stress_set_oom_adjustment(args->name, true);
376
377 (void)memset(page, 0xa5, page_size);
378
379 ret = stress_temp_dir_mk_args(args);
380 if (ret < 0)
381 return exit_status(-ret);
382
383 (void)stress_temp_filename_args(args,
384 filename, sizeof(filename), stress_mwc32());
385
386 if ((fd = open(filename, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR)) < 0) {
387 ret = exit_status(errno);
388 pr_fail("%s: open %s failed, errno=%d (%s)\n",
389 args->name, filename, errno, strerror(errno));
390 (void)unlink(filename);
391 (void)stress_temp_dir_rm_args(args);
392 return ret;
393 }
394
395 (void)unlink(filename);
396 for (n = 0; n < sz; n += page_size) {
397 ssize_t wret;
398
399 wret = write(fd, page, sizeof(page));
400 (void)wret;
401 }
402
403 stress_set_proc_state(args->name, STRESS_STATE_RUN);
404
405 do {
406 NOCLOBBER void *buf;
407
408 if (num_mem_retries >= NUM_MEM_RETRIES_MAX) {
409 pr_err("%s: gave up trying to mmap, no available memory\n",
410 args->name);
411 break;
412 }
413
414 if (!keep_stressing_flag())
415 break;
416
417 if (stress_mwc1()) {
418 buf = mmap(NULL, sz, PROT_READ | PROT_WRITE, flags, fd, 0);
419 } else {
420 buf = mmap(NULL, sz, PROT_READ | PROT_WRITE,
421 flags | MAP_ANONYMOUS, 0, 0);
422 }
423 if (buf == MAP_FAILED) {
424 /* Force MAP_POPULATE off, just in case */
425 #if defined(MAP_POPULATE)
426 flags &= ~MAP_POPULATE;
427 #endif
428 num_mem_retries++;
429 if (num_mem_retries > 1)
430 (void)shim_usleep(100000);
431 continue; /* Try again */
432 }
433 ret = sigsetjmp(jmp_env, 1);
434 if (ret) {
435 (void)munmap((void *)buf, sz);
436 /* Try again */
437 continue;
438 }
439
440 (void)memset(buf, 0xff, sz);
441 (void)stress_madvise_random(buf, sz);
442 (void)stress_mincore_touch_pages(buf, sz);
443 stress_process_madvise(pid, buf, sz);
444
445 ctxt.args = args;
446 ctxt.buf = buf;
447 ctxt.sz = sz;
448
449 #if defined(HAVE_LIB_PTHREAD)
450 {
451 pthread_t pthreads[NUM_PTHREADS];
452 int rets[NUM_PTHREADS];
453 size_t i;
454
455 ctxt.is_thread = true;
456
457 for (i = 0; i < NUM_PTHREADS; i++) {
458 rets[i] = pthread_create(&pthreads[i], NULL,
459 stress_madvise_pages, (void *)&ctxt);
460 }
461 for (i = 0; i < NUM_PTHREADS; i++) {
462 if (rets[i] == 0)
463 (void)pthread_join(pthreads[i], NULL);
464 }
465 }
466 #else
467 {
468 ctxt.is_thread = false;
469 stress_madvise_pages(&ctxt);
470 }
471 #endif
472
473 #if defined(MADV_NORMAL)
474 /* Exercise no-op madvise on 0 size */
475 (void)madvise(buf, 0, MADV_NORMAL);
476
477 /* Invalid size, ENOMEM */
478 (void)madvise(buf, 0xffff0000, MADV_NORMAL);
479
480 /* Invalid advice option, EINVAL */
481 (void)madvise(buf, sz, ~0);
482
483 #endif
484 (void)munmap((void *)buf, sz);
485
486
487 #if defined(MADV_NORMAL)
488 {
489 void *bad_addr = (void *)(~(uintptr_t)0 & ~(page_size -1));
490
491 /* Invalid madvise on unmapped pages */
492 (void)madvise(buf, sz, MADV_NORMAL);
493
494 /* Invalid madvise on wrapped address */
495 (void)madvise(bad_addr, page_size * 2, MADV_NORMAL);
496 }
497 #endif
498
499 inc_counter(args);
500 } while (keep_stressing(args));
501
502 stress_set_proc_state(args->name, STRESS_STATE_DEINIT);
503
504 (void)close(fd);
505 (void)stress_temp_dir_rm_args(args);
506
507 if (sigbus_count)
508 pr_inf("%s: caught %" PRIu64 " SIGBUS signal%s\n",
509 args->name, sigbus_count, sigbus_count == 1 ? "" : "s");
510 return EXIT_SUCCESS;
511 }
512
513 stressor_info_t stress_madvise_info = {
514 .stressor = stress_madvise,
515 .class = CLASS_VM | CLASS_OS,
516 .help = help
517 };
518 #else
519 stressor_info_t stress_madvise_info = {
520 .stressor = stress_not_implemented,
521 .class = CLASS_VM | CLASS_OS,
522 .help = help
523 };
524 #endif
525