1 /*
2 * Copyright (C) 2016-2021 Canonical, Ltd.
3 * Copyright 2021 Colin Ian King
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 *
19 */
20 #include "stress-ng.h"
21
22 static const stress_help_t help[] = {
23 { NULL, "memrate N", "start N workers exercised memory read/writes" },
24 { NULL, "memrate-ops N", "stop after N memrate bogo operations" },
25 { NULL, "memrate-bytes N", "size of memory buffer being exercised" },
26 { NULL, "memrate-rd-mbs N", "read rate from buffer in megabytes per second" },
27 { NULL, "memrate-wr-mbs N", "write rate to buffer in megabytes per second" },
28 { NULL, NULL, NULL }
29 };
30
31 typedef uint64_t (*stress_memrate_func_t)(void *start, void *end, uint64_t rd_mbs, uint64_t wr_mbs, bool *valid);
32
33 typedef struct {
34 const char *name;
35 stress_memrate_func_t func;
36 } stress_memrate_info_t;
37
38 typedef struct {
39 double duration;
40 double kbytes;
41 bool valid;
42 } stress_memrate_stats_t;
43
44 typedef struct {
45 stress_memrate_stats_t *stats;
46 uint64_t memrate_bytes;
47 uint64_t memrate_rd_mbs;
48 uint64_t memrate_wr_mbs;
49 } stress_memrate_context_t;
50
stress_set_memrate_bytes(const char * opt)51 static int stress_set_memrate_bytes(const char *opt)
52 {
53 uint64_t memrate_bytes;
54
55 memrate_bytes = stress_get_uint64_byte(opt);
56 stress_check_range_bytes("memrate-bytes", memrate_bytes,
57 MIN_MEMRATE_BYTES, MAX_MEMRATE_BYTES);
58 return stress_set_setting("memrate-bytes", TYPE_ID_UINT64, &memrate_bytes);
59 }
60
stress_set_memrate_rd_mbs(const char * opt)61 static int stress_set_memrate_rd_mbs(const char *opt)
62 {
63 uint64_t memrate_rd_mbs;
64
65 memrate_rd_mbs = stress_get_uint64(opt);
66 stress_check_range_bytes("memrate-rd-mbs", memrate_rd_mbs,
67 1, 1000000);
68 return stress_set_setting("memrate-rd-mbs", TYPE_ID_UINT64, &memrate_rd_mbs);
69 }
70
stress_set_memrate_wr_mbs(const char * opt)71 static int stress_set_memrate_wr_mbs(const char *opt)
72 {
73 uint64_t memrate_wr_mbs;
74
75 memrate_wr_mbs = stress_get_uint64(opt);
76 stress_check_range_bytes("memrate-wr-mbs", memrate_wr_mbs,
77 1, 1000000);
78 return stress_set_setting("memrate-wr-mbs", TYPE_ID_UINT64, &memrate_wr_mbs);
79 }
80
81 #define SINGLE_ARG(...) __VA_ARGS__
82
83 #define STRESS_MEMRATE_READ(size, type) \
84 static uint64_t stress_memrate_read##size( \
85 void *start, \
86 void *end, \
87 uint64_t rd_mbs, \
88 uint64_t wr_mbs, \
89 bool *valid) \
90 { \
91 register volatile type *ptr; \
92 double t1; \
93 const double dur = 1.0 / (double)rd_mbs; \
94 double total_dur = 0.0; \
95 \
96 (void)wr_mbs; \
97 \
98 t1 = stress_time_now(); \
99 for (ptr = start; ptr < (type *)end;) { \
100 double t2, dur_remainder; \
101 uint32_t i; \
102 \
103 if (!keep_stressing_flag()) \
104 break; \
105 for (i = 0; (i < (uint32_t)MB) && \
106 (ptr < (type *)end); \
107 ptr += 8, i += size) { \
108 (void)(ptr[0]); \
109 (void)(ptr[1]); \
110 (void)(ptr[2]); \
111 (void)(ptr[3]); \
112 (void)(ptr[4]); \
113 (void)(ptr[5]); \
114 (void)(ptr[6]); \
115 (void)(ptr[7]); \
116 } \
117 t2 = stress_time_now(); \
118 total_dur += dur; \
119 dur_remainder = total_dur - (t2 - t1); \
120 \
121 if (dur_remainder >= 0.0) { \
122 struct timespec t; \
123 time_t sec = (time_t)dur_remainder; \
124 \
125 t.tv_sec = sec; \
126 t.tv_nsec = (long)((dur_remainder - \
127 (double)sec) * \
128 STRESS_NANOSECOND); \
129 (void)nanosleep(&t, NULL); \
130 } \
131 } \
132 *valid = true; \
133 return ((uintptr_t)ptr - (uintptr_t)start) / KB; \
134 }
135
136 #if defined(HAVE_INT128_T)
137 STRESS_MEMRATE_READ(128, __uint128_t)
138 #endif
139 STRESS_MEMRATE_READ(64, uint64_t)
140 STRESS_MEMRATE_READ(32, uint32_t)
141 STRESS_MEMRATE_READ(16, uint16_t)
142 STRESS_MEMRATE_READ(8, uint8_t)
143
144 #define STRESS_MEMRATE_WRITE(size, type) \
145 static uint64_t stress_memrate_write##size( \
146 void *start, \
147 void *end, \
148 uint64_t rd_mbs, \
149 uint64_t wr_mbs, \
150 bool *valid) \
151 { \
152 register volatile type *ptr; \
153 double t1; \
154 const double dur = 1.0 / (double)wr_mbs; \
155 double total_dur = 0.0; \
156 \
157 (void)rd_mbs; \
158 \
159 t1 = stress_time_now(); \
160 for (ptr = start; ptr < (type *)end;) { \
161 double t2, dur_remainder; \
162 uint32_t i; \
163 \
164 if (!keep_stressing_flag()) \
165 break; \
166 for (i = 0; (i < (uint32_t)MB) && \
167 (ptr < (type *)end); \
168 ptr += 8, i += size) { \
169 ptr[0] = (uint8_t)i; \
170 ptr[1] = (uint8_t)i; \
171 ptr[2] = (uint8_t)i; \
172 ptr[3] = (uint8_t)i; \
173 ptr[4] = (uint8_t)i; \
174 ptr[5] = (uint8_t)i; \
175 ptr[6] = (uint8_t)i; \
176 ptr[7] = (uint8_t)i; \
177 } \
178 t2 = stress_time_now(); \
179 total_dur += dur; \
180 dur_remainder = total_dur - (t2 - t1); \
181 \
182 if (dur_remainder >= 0.0) { \
183 struct timespec t; \
184 time_t sec = (time_t)dur_remainder; \
185 \
186 t.tv_sec = sec; \
187 t.tv_nsec = (long)((dur_remainder - \
188 (double)sec) * \
189 STRESS_NANOSECOND); \
190 (void)nanosleep(&t, NULL); \
191 } \
192 } \
193 *valid = true; \
194 return ((uintptr_t)ptr - (uintptr_t)start) / KB; \
195 }
196
197 /*
198 *
199 * See https://akkadia.org/drepper/cpumemory.pdf - section 6.1
200 * non-temporal writes using movntdq. Data is not going to be
201 * read, so no need to cache. Write directly to memory.
202 */
203
204 #define STRESS_MEMRATE_WRITE_NT(size, type, movtype, op, init) \
205 static uint64_t stress_memrate_write_nt##size( \
206 void *start, \
207 void *end, \
208 uint64_t rd_mbs, \
209 uint64_t wr_mbs, \
210 bool *valid) \
211 { \
212 register type *ptr; \
213 double t1; \
214 const double dur = 1.0 / (double)wr_mbs; \
215 double total_dur = 0.0; \
216 \
217 (void)rd_mbs; \
218 \
219 if (!__builtin_cpu_supports("sse") && \
220 !__builtin_cpu_supports("sse2")) { \
221 *valid = false; \
222 return 0; \
223 } \
224 \
225 t1 = stress_time_now(); \
226 for (ptr = start; ptr < (type *)end;) { \
227 double t2, dur_remainder; \
228 uint32_t i; \
229 \
230 if (!keep_stressing_flag()) \
231 break; \
232 for (i = 0; (i < (uint32_t)MB) && \
233 (ptr < (type *)end); \
234 ptr += 8, i += size) { \
235 movtype v = (movtype)init; \
236 movtype *vptr = (movtype *)ptr; \
237 \
238 op(&vptr[0], v); \
239 op(&vptr[1], v); \
240 op(&vptr[2], v); \
241 op(&vptr[3], v); \
242 op(&vptr[4], v); \
243 op(&vptr[5], v); \
244 op(&vptr[6], v); \
245 op(&vptr[7], v); \
246 } \
247 t2 = stress_time_now(); \
248 total_dur += dur; \
249 dur_remainder = total_dur - (t2 - t1); \
250 \
251 if (dur_remainder >= 0.0) { \
252 struct timespec t; \
253 time_t sec = (time_t)dur_remainder; \
254 \
255 t.tv_sec = sec; \
256 t.tv_nsec = (long)((dur_remainder - \
257 (double)sec) * \
258 STRESS_NANOSECOND); \
259 (void)nanosleep(&t, NULL); \
260 } \
261 } \
262 *valid = true; \
263 return ((uintptr_t)ptr - (uintptr_t)start) / KB; \
264 }
265
266 #define __BUILTIN_NONTEMPORAL_STORE(a, b) __builtin_nontemporal_store(b, a)
267
268 #if defined(HAVE_INT128_T) && \
269 defined(HAVE_BUILTIN_SUPPORTS) && \
270 defined(HAVE_BUILTIN_NONTEMPORAL_STORE)
271 /* Clang non-temporal stores */
272 STRESS_MEMRATE_WRITE_NT(128, __uint128_t, __uint128_t, __BUILTIN_NONTEMPORAL_STORE, i)
273 #define HAVE_WRITE128NT
274 #elif defined(HAVE_XMMINTRIN_H) && \
275 defined(HAVE_INT128_T) && \
276 defined(HAVE_V2DI) && \
277 defined(HAVE_BUILTIN_SUPPORTS) && \
278 defined(HAVE_BUILTIN_IA32_MOVNTDQ)
279 /* gcc x86 non-temporal stores */
280 STRESS_MEMRATE_WRITE_NT(128, __uint128_t, __v2di, __builtin_ia32_movntdq, SINGLE_ARG({ 0, i }))
281 #define HAVE_WRITE128NT
282 #endif
283
284 #if defined(HAVE_BUILTIN_SUPPORTS) && \
285 defined(HAVE_BUILTIN_NONTEMPORAL_STORE)
286 /* Clang non-temporal stores */
287 STRESS_MEMRATE_WRITE_NT(64, uint64_t, uint64_t, __BUILTIN_NONTEMPORAL_STORE, i)
288 #define HAVE_WRITE64NT
289 #elif defined(HAVE_XMMINTRIN_H) && \
290 defined(HAVE_BUILTIN_SUPPORTS) && \
291 defined(HAVE_BUILTIN_IA32_MOVNTI64)
292 STRESS_MEMRATE_WRITE_NT(64, uint64_t, long long int, __builtin_ia32_movnti64, i)
293 #define HAVE_WRITE64NT
294 #endif
295
296 #if defined(HAVE_BUILTIN_SUPPORTS) && \
297 defined(HAVE_BUILTIN_NONTEMPORAL_STORE)
298 /* Clang non-temporal stores */
299 STRESS_MEMRATE_WRITE_NT(32, uint32_t, uint32_t, __BUILTIN_NONTEMPORAL_STORE, i)
300 #define HAVE_WRITE32NT
301 #elif defined(HAVE_XMMINTRIN_H) && \
302 defined(HAVE_BUILTIN_SUPPORTS) && \
303 defined(HAVE_BUILTIN_IA32_MOVNTI)
304 STRESS_MEMRATE_WRITE_NT(32, uint32_t, int, __builtin_ia32_movnti, i)
305 #define HAVE_WRITE32NT
306 #endif
307
308 #if defined(HAVE_INT128_T)
309 STRESS_MEMRATE_WRITE(128, __uint128_t)
310 #endif
311 STRESS_MEMRATE_WRITE(64, uint64_t)
312 STRESS_MEMRATE_WRITE(32, uint32_t)
313 STRESS_MEMRATE_WRITE(16, uint16_t)
314 STRESS_MEMRATE_WRITE(8, uint8_t)
315
316 static stress_memrate_info_t memrate_info[] = {
317 #if defined(HAVE_WRITE128NT)
318 { "write128nt", stress_memrate_write_nt128 },
319 #endif
320 #if defined(HAVE_WRITE64NT)
321 { "write64nt", stress_memrate_write_nt64 },
322 #endif
323 #if defined(HAVE_WRITE32NT)
324 { "write32nt", stress_memrate_write_nt32 },
325 #endif
326
327 #if defined(HAVE_INT128_T)
328 { "write128", stress_memrate_write128 },
329 #endif
330 { "write64", stress_memrate_write64 },
331 { "write32", stress_memrate_write32 },
332 { "write16", stress_memrate_write16 },
333 { "write8", stress_memrate_write8 },
334
335 #if defined(HAVE_INT128_T)
336 { "read128", stress_memrate_read128 },
337 #endif
338 { "read64", stress_memrate_read64 },
339 { "read32", stress_memrate_read32 },
340 { "read16", stress_memrate_read16 },
341 { "read8", stress_memrate_read8 }
342 };
343
344 static const size_t memrate_items = SIZEOF_ARRAY(memrate_info);
345
stress_memrate_init_data(void * start,void * end)346 static void OPTIMIZE3 stress_memrate_init_data(
347 void *start,
348 void *end)
349 {
350 register volatile uint32_t *ptr;
351
352 for (ptr = start; ptr < (uint32_t *)end; ptr++)
353 *ptr = stress_mwc32();
354 }
355
stress_memrate_mmap(const stress_args_t * args,uint64_t sz)356 static inline void *stress_memrate_mmap(const stress_args_t *args, uint64_t sz)
357 {
358 void *ptr;
359
360 ptr = mmap(NULL, (size_t)sz, PROT_READ | PROT_WRITE,
361 #if defined(MAP_POPULATE)
362 MAP_POPULATE |
363 #endif
364 #if defined(HAVE_MADVISE)
365 MAP_PRIVATE |
366 #else
367 MAP_SHARED |
368 #endif
369 MAP_ANONYMOUS, -1, 0);
370 /* Coverity Scan believes NULL can be returned, doh */
371 if (!ptr || (ptr == MAP_FAILED)) {
372 pr_err("%s: cannot allocate %" PRIu64 " bytes\n",
373 args->name, sz);
374 ptr = MAP_FAILED;
375 } else {
376 #if defined(HAVE_MADVISE) && \
377 defined(MADV_HUGEPAGE)
378 int ret, advice = MADV_NORMAL;
379
380 ret = madvise(ptr, sz, advice);
381 (void)ret;
382 #endif
383 }
384 return ptr;
385 }
386
stress_memrate_child(const stress_args_t * args,void * ctxt)387 static int stress_memrate_child(const stress_args_t *args, void *ctxt)
388 {
389 const stress_memrate_context_t *context = (stress_memrate_context_t *)ctxt;
390 void *buffer, *buffer_end;
391
392 buffer = stress_memrate_mmap(args, context->memrate_bytes);
393 if (buffer == MAP_FAILED)
394 return EXIT_NO_RESOURCE;
395
396 buffer_end = (uint8_t *)buffer + context->memrate_bytes;
397 stress_memrate_init_data(buffer, buffer_end);
398
399 do {
400 size_t i;
401
402 for (i = 0; keep_stressing(args) && (i < memrate_items); i++) {
403 double t1, t2;
404 uint64_t kbytes;
405 stress_memrate_info_t *info = &memrate_info[i];
406 bool valid = false;
407
408 t1 = stress_time_now();
409 kbytes = info->func(buffer, buffer_end,
410 context->memrate_rd_mbs,
411 context->memrate_wr_mbs, &valid);
412 context->stats[i].kbytes += (double)kbytes;
413 t2 = stress_time_now();
414 context->stats[i].duration += (t2 - t1);
415 context->stats[i].valid = valid;
416
417 if (!keep_stressing(args))
418 break;
419 }
420
421 inc_counter(args);
422 } while (keep_stressing(args));
423
424 (void)munmap(buffer, context->memrate_bytes);
425 return EXIT_SUCCESS;
426 }
427
428 /*
429 * stress_memrate()
430 * stress cache/memory/CPU with memrate stressors
431 */
stress_memrate(const stress_args_t * args)432 static int stress_memrate(const stress_args_t *args)
433 {
434 int rc;
435 size_t i, stats_size;
436 bool lock = false;
437 stress_memrate_context_t context;
438
439 context.memrate_bytes = DEFAULT_MEMRATE_BYTES;
440 context.memrate_rd_mbs = ~0ULL;
441 context.memrate_wr_mbs = ~0ULL;
442
443 (void)stress_get_setting("memrate-bytes", &context.memrate_bytes);
444 (void)stress_get_setting("memrate-rd-mbs", &context.memrate_rd_mbs);
445 (void)stress_get_setting("memrate-wr-mbs", &context.memrate_wr_mbs);
446
447 stats_size = memrate_items * sizeof(*context.stats);
448 stats_size = (stats_size + args->page_size - 1) & ~(args->page_size - 1);
449
450 context.stats = (stress_memrate_stats_t *)mmap(NULL, stats_size,
451 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
452 if (context.stats == MAP_FAILED)
453 return EXIT_NO_RESOURCE;
454 for (i = 0; i < memrate_items; i++) {
455 context.stats[i].duration = 0.0;
456 context.stats[i].kbytes = 0.0;
457 context.stats[i].valid = false;
458 }
459
460 context.memrate_bytes = (context.memrate_bytes + 63) & ~(63ULL);
461
462 stress_set_proc_state(args->name, STRESS_STATE_RUN);
463
464 rc = stress_oomable_child(args, &context, stress_memrate_child, STRESS_OOMABLE_NORMAL);
465
466 stress_set_proc_state(args->name, STRESS_STATE_DEINIT);
467
468 pr_lock(&lock);
469 for (i = 0; i < memrate_items; i++) {
470 if (!context.stats[i].valid)
471 continue;
472 if (context.stats[i].duration > 0.001) {
473 char tmp[32];
474 const double rate = context.stats[i].kbytes / (context.stats[i].duration * KB);
475
476 pr_inf_lock(&lock, "%s: %10.10s: %12.2f MB/sec\n",
477 args->name, memrate_info[i].name, rate);
478
479 (void)snprintf(tmp, sizeof(tmp), "%s MB/sec", memrate_info[i].name);
480 stress_misc_stats_set(args->misc_stats, i, tmp, rate);
481 } else {
482 pr_inf_lock(&lock, "%s: %10.10s: interrupted early\n",
483 args->name, memrate_info[i].name);
484 }
485 }
486 pr_unlock(&lock);
487
488 (void)munmap((void *)context.stats, stats_size);
489
490 return rc;
491 }
492
493 static const stress_opt_set_func_t opt_set_funcs[] = {
494 { OPT_memrate_bytes, stress_set_memrate_bytes },
495 { OPT_memrate_rd_mbs, stress_set_memrate_rd_mbs },
496 { OPT_memrate_wr_mbs, stress_set_memrate_wr_mbs },
497 { 0, NULL }
498 };
499
500 stressor_info_t stress_memrate_info = {
501 .stressor = stress_memrate,
502 .class = CLASS_MEMORY,
503 .opt_set_funcs = opt_set_funcs,
504 .help = help
505 };
506