1 /*
2 * Copyright (C) 2013-2021 Canonical, Ltd.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 *
18 * This code is a complete clean re-write of the stress tool by
19 * Colin Ian King <colin.king@canonical.com> and attempts to be
20 * backwardly compatible with the stress tool by Amos Waterland
21 * <apw@rossby.metr.ou.edu> but has more stress tests and more
22 * functionality.
23 *
24 */
25 #include "stress-ng.h"
26
27 #define GAMMA (0.57721566490153286060651209008240243104215933593992L)
28 #define OMEGA (0.56714329040978387299996866221035554975381578718651L)
29 #define PSI (3.35988566624317755317201130291892717968890513373197L)
30 #define PI (3.14159265358979323846264338327950288419716939937511L)
31
32 #define STATS_MAX (250)
33 #define FFT_SIZE (4096)
34 #define STRESS_CPU_DITHER_X (1024)
35 #define STRESS_CPU_DITHER_Y (768)
36 #define MATRIX_PROD_SIZE (128)
37 #define CORRELATE_DATA_LEN (8192)
38 #define CORRELATE_LEN (CORRELATE_DATA_LEN / 16)
39 #define SIEVE_SIZE (104730)
40
41 /*
42 * Some math workarounds for functions that some
43 * math libraries don't have implemented (yet)
44 *
45 * Try and use builtin variants first, then lib math
46 * then try a workaround.
47 */
48 #if defined(HAVE_BUILTIN_CABSL)
49 #define shim_cabsl(x) __builtin_cabsl(x)
50 #else
51 #if defined(HAVE_CABSL)
52 #define shim_cabsl(x) cabsl(x)
53 #else
54 #define shim_cabsl(x) cabs(x)
55 #endif
56 #endif
57
58 #if defined(HAVE_BUILTIN_LGAMMAL)
59 #define shim_lgammal(x) __builtin_lgammal(x)
60 #else
61 #if defined(HAVE_LGAMMAL)
62 #define shim_lgammal(x) lgammal(x)
63 #else
64 #define shim_lgammal(x) lgamma(x)
65 #endif
66 #endif
67
68 #if defined(HAVE_BUILTIN_CPOW)
69 #define shim_cpow(x, z) __builtin_cpow(x, z)
70 #else
71 #if defined(HAVE_CPOW)
72 #define shim_cpow(x, z) cpow(x, z)
73 #else
74 #define shim_cpow(x, z) pow(x, z)
75 #endif
76 #endif
77
78 #if defined(HAVE_BUILTIN_POWL)
79 #define shim_powl(x, y) __builtin_powl(x, y)
80 #else
81 #if defined(HAVE_POWL)
82 #define shim_powl(x, y) powl(x, y)
83 #else
84 #define shim_powl(x, y) pow(x, y)
85 #endif
86 #endif
87
88 #if defined(HAVE_BUILTIN_RINTL)
89 #define shim_rintl(x) __builtin_rintl(x)
90 #else
91 #if defined(HAVE_RINTL)
92 #define shim_rintl(x) rintl(x)
93 #else
94 #define shim_rintl(x) shim_rint(x)
95 #endif
96 #endif
97
98 #if defined(HAVE_BUILTIN_LOG)
99 #define shim_log(x) __builtin_log(x)
100 #else
101 #define shim_log(x) log(x)
102 #endif
103
104 #if defined(HAVE_BUILTIN_LOGL)
105 #define shim_logl(x) __builtin_logl(x)
106 #else
107 #if defined(HAVE_LOGL)
108 #define shim_logl(x) logl(x)
109 #else
110 #define shim_logl(x) shim_log(x)
111 #endif
112 #endif
113
114 #if defined(HAVE_BUILTIN_EXP)
115 #define shim_exp(x) __builtin_exp(x)
116 #else
117 #define shim_exp(x) exp(x)
118 #endif
119
120 #if defined(HAVE_BUILTIN_EXPL)
121 #define shim_expl(x) __builtin_expl(x)
122 #else
123 #if defined(HAVE_EXPL) && !defined(__HAIKU__)
124 #define shim_expl(x) expl(x)
125 #else
126 #define shim_expl(x) shim_exp(x)
127 #endif
128 #endif
129
130 #if defined(HAVE_BUILTIN_COSF)
131 #define shim_cosf(x) __builtin_cosf(x)
132 #else
133 #define shim_cosf(x) cosf(x)
134 #endif
135
136 #if defined(HAVE_BUILTIN_COS)
137 #define shim_cos(x) __builtin_cos(x)
138 #else
139 #define shim_cos(x) cos(x)
140 #endif
141
142 #if defined(HAVE_BUILTIN_COSL)
143 #define shim_cosl(x) __builtin_cosl(x)
144 #else
145 #if defined(HAVE_COSL)
146 #define shim_cosl(x) cosl(x)
147 #else
148 #define shim_cosl(x) ((long double)shim_cos((double)(x)))
149 #endif
150 #endif
151
152 #if defined(HAVE_BUILTIN_COSHL)
153 #define shim_coshl(x) __builtin_coshl(x)
154 #else
155 #if defined(HAVE_COSHL)
156 #define shim_coshl(x) coshl(x)
157 #else
158 #define shim_coshl(x) ((long double)cosh((double)(x)))
159 #endif
160 #endif
161
162 #if defined(HAVE_BUILTIN_CCOS)
163 #define shim_ccos(x) __builtin_ccos(x)
164 #else
165 #if defined(HAVE_CCOS)
166 #define shim_ccos(x) ccos(x)
167 #else
168 #define shim_ccos(x) shim_cos(x)
169 #endif
170 #endif
171
172 #if defined(HAVE_BUILTIN_CCOSF)
173 #define shim_ccosf(x) __builtin_ccosf(x)
174 #else
175 #if defined(HAVE_CCOSF)
176 #define shim_ccosf(x) ccosf(x)
177 #else
178 #define shim_ccosf(x) shim_ccos(x)
179 #endif
180 #endif
181
182 #if defined(HAVE_BUILTIN_CCOSL)
183 #define shim_ccosl(x) __builtin_ccosl(x)
184 #else
185 #if defined(HAVE_CCOSL)
186 #define shim_ccosl(x) ccosl(x)
187 #else
188 #define shim_ccosl(x) ((long double complex)shim_ccos((double complex)(x))
189 #endif
190 #endif
191
192 #if defined(HAVE_BUILTIN_SINF)
193 #define shim_sinf(x) __builtin_sin(x)
194 #else
195 #define shim_sinf(x) sinf(x)
196 #endif
197
198 #if defined(HAVE_BUILTIN_SIN)
199 #define shim_sin(x) __builtin_sin(x)
200 #else
201 #define shim_sin(x) sin(x)
202 #endif
203
204 #if defined(HAVE_BUILTIN_SINL)
205 #define shim_sinl(x) __builtin_sinl(x)
206 #else
207 #if defined(HAVE_SINL)
208 #define shim_sinl(x) sinl(x)
209 #else
210 #define shim_sinl(x) ((long double)shim_sin((double)(x)))
211 #endif
212 #endif
213
214 #if defined(HAVE_BUILTIN_SINHL)
215 #define shim_sinhl(x) __builtin_sinhl(x)
216 #else
217 #if defined(HAVE_SINHL)
218 #define shim_sinhl(x) sinhl(x)
219 #else
220 #define shim_sinhl(x) ((long double)sinh((double)(x)))
221 #endif
222 #endif
223
224 #if defined(HAVE_BUILTIN_CSIN)
225 #define shim_csin(x) __builtin_csin(x)
226 #else
227 #if defined(HAVE_CSIN)
228 #define shim_csin(x) csin(x)
229 #else
230 #define shim_csin(x) shim_sin(x)
231 #endif
232 #endif
233
234 #if defined(HAVE_BUILTIN_CSINF)
235 #define shim_csinf(x) __builtin_csinf(x)
236 #else
237 #if defined(HAVE_CSINF)
238 #define shim_csinf(x) csinf(x)
239 #else
240 #define shim_csinf(x) shim_csin(x)
241 #endif
242 #endif
243
244 #if defined(HAVE_BUILTIN_CSINL)
245 #define shim_csinl(x) __builtin_csinl(x)
246 #else
247 #if defined(HAVE_CSINL)
248 #define shim_csinl(x) csinl(x)
249 #else
250 #define shim_csinl(x) (long double complex)shim_csin((double complex)(x))
251 #endif
252 #endif
253
254 #if defined(HAVE_BUILTIN_SQRT)
255 #define shim_sqrt(x) __builtin_sqrt(x)
256 #else
257 #define shim_sqrt(x) sqrt(x)
258 #endif
259
260 #if defined(HAVE_BUILTIN_SQRTL)
261 #define shim_sqrtl(x) __builtin_sqrtl(x)
262 #else
263 #if defined(HAVE_SQRTL)
264 #define shim_sqrtl(x) sqrtl(x)
265 #else
266 #define shim_sqrtl(x) shim_sqrt(x)
267 #endif
268 #endif
269
270 #if defined(HAVE_BUILTIN_FABS)
271 #define shim_fabs(x) __builtin_fabs(x)
272 #else
273 #define shim_fabs(x) fabs(x)
274 #endif
275
276 #if defined(HAVE_BUILTIN_FABSL)
277 #define shim_fabsl(x) __builtin_fabsl(x)
278 #else
279 #define shim_fabsl(x) fabsl(x)
280 #endif
281
282 #if defined(HAVE_BUILTIN_RINT)
283 #define shim_rint(x) __builtin_rint(x)
284 #else
285 #define shim_rint(x) rint(x)
286 #endif
287
288
289 /*
290 * the CPU stress test has different classes of cpu stressor
291 */
292 typedef void (*stress_cpu_func)(const char *name);
293
294 typedef struct {
295 const char *name; /* human readable form of stressor */
296 const stress_cpu_func func; /* the cpu method function */
297 } stress_cpu_method_info_t;
298
299 static const stress_help_t help[] = {
300 { "c N", "cpu N", "start N workers that perform CPU only loading" },
301 { NULL, "cpu-ops N", "stop after N cpu bogo operations" },
302 { "l P", "cpu-load P", "load CPU by P %, 0=sleep, 100=full load (see -c)" },
303 { NULL, "cpu-load-slice S", "specify time slice during busy load" },
304 { NULL, "cpu-method M", "specify stress cpu method M, default is all" },
305 { NULL, NULL, NULL }
306 };
307
308 static const stress_cpu_method_info_t cpu_methods[];
309
310 /* Don't make this static to ensure dithering does not get optimised out */
311 uint8_t pixels[STRESS_CPU_DITHER_X][STRESS_CPU_DITHER_Y];
312
stress_set_cpu_load(const char * opt)313 static int stress_set_cpu_load(const char *opt) {
314 int32_t cpu_load;
315
316 cpu_load = stress_get_int32(opt);
317 stress_check_range("cpu-load", (uint64_t)cpu_load, 0, 100);
318 return stress_set_setting("cpu-load", TYPE_ID_INT32, &cpu_load);
319 }
320
321 /*
322 * stress_set_cpu_load_slice()
323 * < 0 - number of iterations per busy slice
324 * = 0 - random duration between 0..0.5 seconds
325 * > 0 - milliseconds per busy slice
326 */
stress_set_cpu_load_slice(const char * opt)327 static int stress_set_cpu_load_slice(const char *opt)
328 {
329 int32_t cpu_load_slice;
330
331 cpu_load_slice = stress_get_int32(opt);
332 if ((cpu_load_slice < -5000) || (cpu_load_slice > 5000)) {
333 (void)fprintf(stderr, "cpu-load-slice must in the range -5000 to 5000.\n");
334 _exit(EXIT_FAILURE);
335 }
336 return stress_set_setting("cpu-load-slice", TYPE_ID_INT32, &cpu_load_slice);
337 }
338
339 /*
340 * stress_cpu_sqrt()
341 * stress CPU on square roots
342 */
stress_cpu_sqrt(const char * name)343 static void HOT TARGET_CLONES stress_cpu_sqrt(const char *name)
344 {
345 int i;
346
347 for (i = 0; i < 16384; i++) {
348 uint64_t rnd = stress_mwc32();
349 double r_d = shim_sqrt((double)rnd) * shim_sqrt((double)rnd);
350 long double r_ld = shim_sqrtl((long double)rnd) * shim_sqrtl((long double)rnd);
351 register uint64_t tmp;
352
353 r_d = shim_rint(r_d);
354 tmp = (uint64_t)r_d;
355 if (UNLIKELY((g_opt_flags & OPT_FLAGS_VERIFY) && (tmp != rnd))) {
356 pr_fail("%s: sqrt error detected on "
357 "sqrt(%" PRIu64 ")\n", name, rnd);
358 if (!keep_stressing_flag())
359 break;
360 }
361
362 r_ld = shim_rintl(r_ld);
363 tmp = (uint64_t)r_ld;
364 if (UNLIKELY((g_opt_flags & OPT_FLAGS_VERIFY) && (tmp != rnd))) {
365 pr_fail("%s: sqrtf error detected on "
366 "sqrt(%" PRIu64 ")\n", name, rnd);
367 if (!keep_stressing_flag())
368 break;
369 }
370 }
371 }
372
stress_is_affinity_set(void)373 static bool stress_is_affinity_set(void)
374 {
375 #if defined(HAVE_SCHED_GETAFFINITY)
376 cpu_set_t mask;
377 int i;
378 const int cpus_online = (int)stress_get_processors_online();
379
380 CPU_ZERO(&mask);
381 if (sched_getaffinity(0, sizeof(mask), &mask) < 0)
382 return false; /* Can't tell, so assume not */
383
384 /*
385 * If any of the CPU affinities across all the CPUs
386 * are zero then we know the stressor as been pinned
387 * to some CPUs and not to others, so affinity has been
388 * set which can lead to load balancing difficulties
389 */
390 for (i = 0; i < cpus_online; i++) {
391 if (!CPU_ISSET(i, &mask))
392 return true;
393 }
394 return false;
395 #else
396 return false; /* Don't know, so assume not */
397 #endif
398 }
399
400 /*
401 * stress_cpu_loop()
402 * simple CPU busy loop
403 */
stress_cpu_loop(const char * name)404 static void OPTIMIZE0 stress_cpu_loop(const char *name)
405 {
406 uint32_t i, i_sum = 0;
407 const uint32_t sum = 134209536UL;
408
409 for (i = 0; i < 16384; i++) {
410 i_sum += i;
411 FORCE_DO_NOTHING();
412 }
413 if ((g_opt_flags & OPT_FLAGS_VERIFY) && (i_sum != sum))
414 pr_fail("%s: cpu loop 0..16383 sum was %" PRIu32 " and "
415 "did not match the expected value of %" PRIu32 "\n",
416 name, i_sum, sum);
417 }
418
419 /*
420 * stress_cpu_gcd()
421 * compute Greatest Common Divisor
422 */
stress_cpu_gcd(const char * name)423 static void HOT OPTIMIZE3 TARGET_CLONES stress_cpu_gcd(const char *name)
424 {
425 uint32_t i, gcd_sum = 0;
426 const uint32_t gcd_checksum = 63000868UL;
427 uint64_t lcm_sum = 0;
428 const uint64_t lcm_checksum = 41637399273ULL;
429
430 for (i = 0; i < 16384; i++) {
431 register uint32_t a = i, b = i % (3 + (1997 ^ i));
432 register uint64_t lcm = ((uint64_t)a * b);
433
434 while (b != 0) {
435 register uint32_t r = b;
436 b = a % b;
437 a = r;
438 }
439 if (a)
440 lcm_sum += (lcm / a);
441 gcd_sum += a;
442 FORCE_DO_NOTHING();
443 }
444 if ((g_opt_flags & OPT_FLAGS_VERIFY) &&
445 (gcd_sum != gcd_checksum) &&
446 (lcm_sum != lcm_checksum))
447 pr_fail("%s: gcd error detected, failed modulo "
448 "or assignment operations\n", name);
449 }
450
451 /*
452 * stress_cpu_bitops()
453 * various bit manipulation hacks from bithacks
454 * https://graphics.stanford.edu/~seander/bithacks.html
455 */
stress_cpu_bitops(const char * name)456 static void HOT OPTIMIZE3 TARGET_CLONES stress_cpu_bitops(const char *name)
457 {
458 uint32_t i, i_sum = 0;
459 const uint32_t sum = 0x8aac0aab;
460
461 for (i = 0; i < 16384; i++) {
462 {
463 register uint32_t r, v, s = (sizeof(v) * 8) - 1;
464
465 /* Reverse bits */
466 r = v = i;
467 for (v >>= 1; v; v >>= 1, s--) {
468 r <<= 1;
469 r |= v & 1;
470 }
471 r <<= s;
472 i_sum += r;
473 }
474 {
475 /* parity check */
476 register uint32_t v = i;
477
478 v ^= v >> 16;
479 v ^= v >> 8;
480 v ^= v >> 4;
481 v &= 0xf;
482 i_sum += (0x6996 >> v) & 1;
483 }
484 {
485 /* Brian Kernighan count bits */
486 register uint32_t j, v = i;
487
488 for (j = 0; v; j++)
489 v &= v - 1;
490 i_sum += j;
491 }
492 {
493 /* round up to nearest highest power of 2 */
494 register uint32_t v = i - 1;
495
496 v |= v >> 1;
497 v |= v >> 2;
498 v |= v >> 4;
499 v |= v >> 8;
500 v |= v >> 16;
501 i_sum += v;
502 }
503 }
504 if ((g_opt_flags & OPT_FLAGS_VERIFY) && (i_sum != sum))
505 pr_fail("%s: bitops error detected, failed "
506 "bitops operations\n", name);
507 }
508
509 /*
510 * stress_cpu_trig()
511 * simple sin, cos trig functions
512 */
stress_cpu_trig(const char * name)513 static void HOT stress_cpu_trig(const char *name)
514 {
515 int i;
516 long double d_sum = 0.0L;
517
518 (void)name;
519
520 for (i = 0; i < 1500; i++) {
521 long double theta = (2.0L * PI * (long double)i)/1500.0L;
522 {
523 double thetad = (double)theta;
524 float thetaf = (float)theta;
525
526 d_sum += (shim_cosl(theta) * shim_sinl(theta));
527 d_sum += ((long double)shim_cos(thetad) * (long double)shim_sin(thetad));
528 d_sum += ((long double)shim_cosf(thetaf) * (long double)shim_sinf(thetaf));
529 }
530 {
531 long double thetal = theta * 2.0L;
532 double thetad = (double)thetal;
533 float thetaf = (float)thetal;
534
535 d_sum += shim_cosl(thetal);
536 d_sum += (long double)shim_cos(thetad);
537 d_sum += (long double)shim_cosf(thetaf);
538 }
539 {
540 long double thetal = theta * 3.0L;
541 double thetad = (double)thetal;
542 float thetaf = (float)thetal;
543
544 d_sum += shim_sinl(thetal);
545 d_sum += (long double)shim_sin(thetad);
546 d_sum += (long double)shim_sinf(thetaf);
547 }
548 }
549 stress_long_double_put(d_sum);
550 }
551
552 /*
553 * stress_cpu_hyperbolic()
554 * simple hyperbolic sinh, cosh functions
555 */
stress_cpu_hyperbolic(const char * name)556 static void HOT stress_cpu_hyperbolic(const char *name)
557 {
558 int i;
559 long double d_sum = 0.0L;
560
561 (void)name;
562
563 for (i = 0; i < 1500; i++) {
564 long double theta = (2.0L * PI * (long double)i)/1500.0L;
565 {
566 double thetad = (double)theta;
567 float thetaf = (float)theta;
568
569 d_sum += (shim_coshl(theta) * shim_sinhl(theta));
570 d_sum += ((long double)cosh(thetad) * (long double)sinh(thetad));
571 d_sum += ((long double)coshf(thetaf) * (long double)sinhf(thetaf));
572 }
573 {
574 long double thetal = theta * 2.0L;
575 double thetad = (double)theta;
576 float thetaf = (float)theta;
577
578 d_sum += shim_coshl(thetal);
579 d_sum += (long double)cosh(thetad);
580 d_sum += (long double)coshf(thetaf);
581 }
582 {
583 long double thetal = theta * 3.0L;
584 double thetad = (double)theta;
585 float thetaf = (float)theta;
586
587 d_sum += shim_sinhl(thetal);
588 d_sum += (long double)sinh(thetad);
589 d_sum += (long double)sinhf(thetaf);
590 }
591 }
592 stress_long_double_put(d_sum);
593 }
594
595 /*
596 * stress_cpu_rand()
597 * generate lots of pseudo-random integers
598 */
stress_cpu_rand(const char * name)599 static void HOT OPTIMIZE3 stress_cpu_rand(const char *name)
600 {
601 int i;
602 uint32_t i_sum = 0;
603 const uint32_t sum = 0xc253698c;
604
605 STRESS_MWC_SEED();
606 for (i = 0; i < 16384; i++)
607 i_sum += stress_mwc32();
608
609 if ((g_opt_flags & OPT_FLAGS_VERIFY) && (i_sum != sum))
610 pr_fail("%s: rand error detected, failed sum of "
611 "pseudo-random values\n", name);
612 }
613
614 /*
615 * stress_cpu_rand48()
616 * generate random values using rand48 family of functions
617 */
stress_cpu_rand48(const char * name)618 static void HOT OPTIMIZE3 stress_cpu_rand48(const char *name)
619 {
620 int i;
621 double d = 0;
622 long int l = 0;
623
624 (void)name;
625
626 srand48(0x0defaced);
627 for (i = 0; i < 16384; i++) {
628 d += drand48();
629 l += lrand48();
630 }
631 stress_double_put(d);
632 stress_uint64_put((uint64_t)l);
633 }
634
635 /*
636 * stress_cpu_lfsr32()
637 * generate 16384 values from the Galois polynomial
638 * x^32 + x^31 + x^29 + x + 1
639 */
stress_cpu_lfsr32(const char * name)640 static void HOT OPTIMIZE3 stress_cpu_lfsr32(const char *name)
641 {
642 static uint32_t lfsr = 0xf63acb01;
643 register int i;
644
645 (void)name;
646
647 for (i = 0; i < 16384; i++) {
648 lfsr = (lfsr >> 1) ^ (unsigned int)(-(lfsr & 1u) & 0xd0000001U);
649 }
650 stress_uint32_put(lfsr);
651 }
652
653 /*
654 * stress_cpu_nsqrt()
655 * iterative Newton–Raphson square root
656 */
stress_cpu_nsqrt(const char * name)657 static void HOT OPTIMIZE3 TARGET_CLONES stress_cpu_nsqrt(const char *name)
658 {
659 int i;
660 const long double precision = 1.0e-12L;
661 const int max_iter = 56;
662
663 for (i = 16300; i < 16384; i++) {
664 long double n = (long double)i;
665 long double lo = (n < 1.0L) ? n : 1.0L;
666 long double hi = (n < 1.0L) ? 1.0L : n;
667 long double rt;
668 int j = 0;
669
670 while ((j++ < max_iter) && ((hi - lo) > precision)) {
671 long double g = (lo + hi) / 2.0L;
672 if ((g * g) > n)
673 hi = g;
674 else
675 lo = g;
676 }
677 rt = (lo + hi) / 2.0L;
678
679 if (g_opt_flags & OPT_FLAGS_VERIFY) {
680 const long double r2 = shim_rintl(rt * rt);
681
682 if (j >= max_iter)
683 pr_fail("%s: Newton-Raphson sqrt "
684 "computation took more iterations "
685 "than expected\n", name);
686 if ((int)r2 != i)
687 pr_fail("%s: Newton-Raphson sqrt not "
688 "accurate enough\n", name);
689 }
690 }
691 }
692
693 /*
694 * stress_cpu_phi()
695 * compute the Golden Ratio
696 */
stress_cpu_phi(const char * name)697 static void HOT OPTIMIZE3 TARGET_CLONES stress_cpu_phi(const char *name)
698 {
699 long double phi; /* Golden ratio */
700 const long double precision = 1.0e-15L;
701 const long double phi_ = (1.0L + shim_sqrtl(5.0L)) / 2.0L;
702 register uint64_t a, b;
703 const uint64_t mask = 1ULL << 63;
704 int i;
705
706 /* Pick any two starting points */
707 a = stress_mwc64() % 99;
708 b = stress_mwc64() % 99;
709
710 /* Iterate until we approach overflow */
711 for (i = 0; (i < 64) && !((a | b) & mask); i++) {
712 /* Find nth term */
713 register uint64_t c = a + b;
714
715 a = b;
716 b = c;
717 }
718 /* And we have the golden ratio */
719 phi = (long double)b / (long double)a;
720
721 if ((g_opt_flags & OPT_FLAGS_VERIFY) &&
722 (shim_fabsl(phi - phi_) > precision))
723 pr_fail("%s: Golden Ratio phi not accurate enough\n",
724 name);
725 }
726
727 /*
728 * stress_cpu_apery()
729 * compute Apéry's constant
730 */
stress_cpu_apery(const char * name)731 static void HOT OPTIMIZE3 stress_cpu_apery(const char *name)
732 {
733 uint32_t n;
734 long double a = 0.0L, a_ = a;
735 const long double precision = 1.0e-14L;
736
737 (void)name;
738
739 for (n = 1; n < 100000; n++) {
740 long double n3 = (long double)n;
741
742 a_ = a;
743 n3 = n3 * n3 * n3;
744 a += (1.0L / n3);
745 if (shim_fabsl(a - a_) < precision)
746 break;
747 }
748 if (shim_fabsl(a - a_) > precision)
749 pr_fail("%s: Apéry's const not accurate enough\n", name);
750 }
751
752
753 #if defined(HAVE_COMPLEX_H) && \
754 defined(HAVE_COMPLEX) && \
755 defined(__STDC_IEC_559_COMPLEX__) &&\
756 !defined(__UCLIBC__)
757 /*
758 * fft_partial()
759 * partial Fast Fourier Transform
760 */
fft_partial(double complex * data,double complex * tmp,const int n,const int m)761 static void HOT OPTIMIZE3 fft_partial(
762 double complex *data,
763 double complex *tmp,
764 const int n,
765 const int m)
766 {
767 if (m < n) {
768 const int m2 = m * 2;
769 int i;
770
771 fft_partial(tmp, data, n, m2);
772 fft_partial(tmp + m, data + m, n, m2);
773 for (i = 0; i < n; i += m2) {
774 const double complex negI = -(double complex)I;
775 double complex v = tmp[i];
776 double complex t =
777 cexp((negI * (double)PI * (double)i) /
778 (double)n) * tmp[i + m];
779 data[i / 2] = v + t;
780 data[(i + n) / 2] = v - t;
781 }
782 }
783 }
784
785 /*
786 * stress_cpu_fft()
787 * Fast Fourier Transform
788 */
stress_cpu_fft(const char * name)789 static void HOT TARGET_CLONES stress_cpu_fft(const char *name)
790 {
791 static double complex buf[FFT_SIZE], tmp[FFT_SIZE];
792 int i;
793
794 (void)name;
795
796 for (i = 0; i < FFT_SIZE; i++)
797 buf[i] = (double complex)(i % 63);
798
799 (void)memcpy(tmp, buf, sizeof(*tmp) * FFT_SIZE);
800 fft_partial(buf, tmp, FFT_SIZE, 1);
801 }
802 #endif
803
804 /*
805 * stress_cpu_euler()
806 * compute e using series
807 */
stress_cpu_euler(const char * name)808 static void HOT OPTIMIZE3 TARGET_CLONES stress_cpu_euler(const char *name)
809 {
810 long double e = 1.0L, last_e;
811 long double fact = 1.0L;
812 long double precision = 1.0e-20L;
813 int n = 1;
814
815 do {
816 last_e = e;
817 fact *= n;
818 n++;
819 e += (1.0L / fact);
820 } while ((n < 25) && (shim_fabsl(e - last_e) > precision));
821
822 if ((g_opt_flags & OPT_FLAGS_VERIFY) && (n >= 25))
823 pr_fail("%s: Euler computation took more iterations "
824 "than expected\n", name);
825 }
826
827 /*
828 * random_buffer()
829 * fill a uint8_t buffer full of random data
830 * buffer *must* be multiple of 4 bytes in size
831 */
random_buffer(uint8_t * data,const size_t len)832 static void random_buffer(uint8_t *data, const size_t len)
833 {
834 size_t i;
835
836 for (i = 0; i < len / 4; i++) {
837 uint32_t v = stress_mwc32();
838
839 *data++ = (uint8_t)v;
840 v >>= 8;
841 *data++ = (uint8_t)v;
842 v >>= 8;
843 *data++ = (uint8_t)v;
844 v >>= 8;
845 *data++ = (uint8_t)v;
846 }
847 }
848
849 /*
850 * stress_cpu_collatz()
851 * stress test integer collatz conjecture
852 */
stress_cpu_collatz(const char * name)853 static void HOT OPTIMIZE3 TARGET_CLONES stress_cpu_collatz(const char *name)
854 {
855 register uint64_t n = 989345275647ULL; /* Has 1348 steps in cycle */
856 register int i;
857
858 for (i = 0; n != 1; i++) {
859 n = (n & 1) ? (3 * n) + 1 : n / 2;
860 }
861 if ((g_opt_flags & OPT_FLAGS_VERIFY) && (i != 1348))
862 pr_fail("%s: error detected, failed collatz progression\n",
863 name);
864 }
865
866 /*
867 * stress_cpu_hash_generic()
868 * stress test generic string hash function
869 */
stress_cpu_hash_generic(const char * name,const char * hash_name,uint32_t (* hash_func)(const char * str),const uint32_t result)870 static void stress_cpu_hash_generic(
871 const char *name,
872 const char *hash_name,
873 uint32_t (*hash_func)(const char *str),
874 const uint32_t result)
875 {
876 char buffer[128];
877 size_t i;
878 uint32_t i_sum = 0;
879
880 STRESS_MWC_SEED();
881 random_buffer((uint8_t *)buffer, sizeof(buffer));
882 /* Make it ASCII range ' '..'_' */
883 for (i = 0; i < sizeof(buffer); i++)
884 buffer[i] = (buffer[i] & 0x3f) + ' ';
885
886 for (i = sizeof(buffer) - 1; i; i--) {
887 buffer[i] = '\0';
888 i_sum += hash_func(buffer);
889 }
890 if ((g_opt_flags & OPT_FLAGS_VERIFY) && (i_sum != result))
891 pr_fail("%s: %s error detected, failed hash %s sum\n",
892 name, hash_name, hash_name);
893 }
894
895 /*
896 * stress_cpu_jenkin()
897 * multiple iterations on jenkin hash
898 */
stress_cpu_jenkin(const char * name)899 static void stress_cpu_jenkin(const char *name)
900 {
901 uint8_t buffer[128];
902 size_t i;
903 uint32_t i_sum = 0;
904 const uint32_t sum = 0xc53302a5;
905
906 STRESS_MWC_SEED();
907 random_buffer(buffer, sizeof(buffer));
908
909 for (i = sizeof(buffer) - 1; i; i--) {
910 buffer[i] = '\0';
911 i_sum += stress_hash_jenkin(buffer, sizeof(buffer));
912 }
913
914 if ((g_opt_flags & OPT_FLAGS_VERIFY) && (i_sum != sum))
915 pr_fail("%s: jenkin error detected, failed hash jenkin sum\n",
916 name);
917 }
918
919 /*
920 * stress_cpu_little_endian()
921 * returns true if CPU is little endian
922 */
stress_cpu_little_endian(void)923 static inline bool stress_cpu_little_endian(void)
924 {
925 const uint32_t x = 0x12345678;
926 const uint8_t *y = (const uint8_t *)&x;
927
928 return *y == 0x78;
929 }
930
931 /*
932 * stress_cpu_murmur3_32
933 * multiple iterations on murmur3_32 hash, based on
934 * Austin Appleby's Murmur3 hash, code derived from
935 * https://en.wikipedia.org/wiki/MurmurHash
936 */
stress_cpu_murmur3_32(const char * name)937 static void stress_cpu_murmur3_32(const char *name)
938 {
939 uint8_t buffer[128];
940 size_t i;
941 uint32_t sum, i_sum = 0;
942 const uint32_t seed = 0xf12b35e1; /* arbitrary value */
943
944 STRESS_MWC_SEED();
945 random_buffer(buffer, sizeof(buffer));
946 for (i = sizeof(buffer) - 1; i; i--) {
947 buffer[i] = '\0';
948 i_sum += stress_hash_murmur3_32((uint8_t *)buffer, sizeof(buffer), seed);
949 }
950
951 /*
952 * Murmur produces different results depending on the Endianness
953 */
954 sum = stress_cpu_little_endian() ? 0xa53a4bb1 : 0x71eb83cc;
955
956 if ((g_opt_flags & OPT_FLAGS_VERIFY) && (i_sum != sum))
957 pr_fail("%s: murmur3_32 error detected, failed hash murmur3_32 sum\n",
958 name);
959 }
960
961 /*
962 * stress_cpu_pjw()
963 * stress test hash pjw
964 */
stress_cpu_pjw(const char * name)965 static void stress_cpu_pjw(const char *name)
966 {
967 stress_cpu_hash_generic(name, "pjw", stress_hash_pjw, 0xa89a91c0);
968 }
969
970 /*
971 * stress_cpu_djb2a()
972 * stress test hash djb2a
973 */
stress_cpu_djb2a(const char * name)974 static void stress_cpu_djb2a(const char *name)
975 {
976 stress_cpu_hash_generic(name, "djb2a", stress_hash_djb2a, 0x6a60cb5a);
977 }
978
979 /*
980 * stress_cpu_fnv1a()
981 * stress test hash fnv1a
982 */
stress_cpu_fnv1a(const char * name)983 static void HOT stress_cpu_fnv1a(const char *name)
984 {
985 stress_cpu_hash_generic(name, "fnv1a", stress_hash_fnv1a, 0x8ef17e80);
986 }
987
988 /*
989 * stress_cpu_sdbm()
990 * stress test hash sdbm
991 */
stress_cpu_sdbm(const char * name)992 static void stress_cpu_sdbm(const char *name)
993 {
994 stress_cpu_hash_generic(name, "sdbm", stress_hash_sdbm, 0x46357819);
995 }
996
997 /*
998 * stress_cpu_nhash()
999 * stress test hash nhash
1000 */
stress_cpu_nhash(const char * name)1001 static void stress_cpu_nhash(const char *name)
1002 {
1003 stress_cpu_hash_generic(name, "nhash", stress_hash_nhash, 0x1cc86e3);
1004 }
1005
1006 /*
1007 * stress_cpu_idct()
1008 * compute 8x8 Inverse Discrete Cosine Transform
1009 */
stress_cpu_idct(const char * name)1010 static void HOT OPTIMIZE3 TARGET_CLONES stress_cpu_idct(const char *name)
1011 {
1012 const double invsqrt2 = 1.0 / shim_sqrt(2.0);
1013 const double pi_over_16 = (double)PI / 16.0;
1014 const int sz = 8;
1015 int i, j, u, v;
1016 float data[sz][sz], idct[sz][sz];
1017
1018 /*
1019 * Set up DCT
1020 */
1021 for (i = 0; i < sz; i++) {
1022 for (j = 0; j < sz; j++) {
1023 data[i][j] = (i + j == 0) ? 2040: 0;
1024 }
1025 }
1026 for (i = 0; i < sz; i++) {
1027 const double pi_i = (i + i + 1) * pi_over_16;
1028
1029 for (j = 0; j < sz; j++) {
1030 const double pi_j = (j + j + 1) * pi_over_16;
1031 double sum = 0.0;
1032
1033 for (u = 0; u < sz; u++) {
1034 const double cos_pi_i_u = shim_cos(pi_i * u);
1035
1036 for (v = 0; v < sz; v++) {
1037 const double cos_pi_j_v =
1038 shim_cos(pi_j * v);
1039
1040 sum += ((double)data[u][v] *
1041 (u ? 1.0 : invsqrt2) *
1042 (v ? 1.0 : invsqrt2) *
1043 cos_pi_i_u * cos_pi_j_v);
1044 }
1045 }
1046 idct[i][j] = (float)(0.25 * sum);
1047 }
1048 }
1049 /* Final output should be a 8x8 matrix of values 255 */
1050 if (g_opt_flags & OPT_FLAGS_VERIFY) {
1051 for (i = 0; i < sz; i++) {
1052 for (j = 0; j < sz; j++) {
1053 if ((int)idct[i][j] != 255) {
1054 pr_fail("%s: IDCT error detected, "
1055 "IDCT[%d][%d] was %d, "
1056 "expecting 255\n",
1057 name, i, j, (int)idct[i][j]);
1058 }
1059 }
1060 if (!keep_stressing_flag())
1061 return;
1062 }
1063 }
1064 }
1065
1066 #define int_ops(_type, a, b, c1, c2, c3)\
1067 do { \
1068 a += b; \
1069 b ^= a; \
1070 a >>= 1; \
1071 b <<= 2; \
1072 b -= a; \
1073 a ^= (_type)~0; \
1074 b ^= ~(c1); \
1075 a *= 3; \
1076 b *= 7; \
1077 a += 2; \
1078 b -= 3; \
1079 a /= 77; \
1080 b /= 3; \
1081 a <<= 1; \
1082 b <<= 2; \
1083 a |= 1; \
1084 b |= 3; \
1085 a *= stress_mwc32(); \
1086 b ^= stress_mwc32(); \
1087 a += stress_mwc32(); \
1088 b -= stress_mwc32(); \
1089 a /= 7; \
1090 b /= 9; \
1091 a |= (c2); \
1092 b &= (c3); \
1093 } while (0);
1094
1095 #define C1 (0xf0f0f0f0f0f0f0f0ULL)
1096 #define C2 (0x1000100010001000ULL)
1097 #define C3 (0xffeffffefebefffeULL)
1098
1099 /*
1100 * Generic int stressor macro
1101 */
1102 #define stress_cpu_int(_type, _sz, _a, _b, _c1, _c2, _c3) \
1103 static void HOT OPTIMIZE3 TARGET_CLONES stress_cpu_int ## _sz(const char *name)\
1104 { \
1105 const _type mask = (_type)~(_type)0; \
1106 const _type a_final = _a; \
1107 const _type b_final = _b; \
1108 const _type c1 = _c1 & mask; \
1109 const _type c2 = _c2 & mask; \
1110 const _type c3 = _c3 & mask; \
1111 register _type a, b; \
1112 int i; \
1113 \
1114 STRESS_MWC_SEED(); \
1115 a = (_type)stress_mwc32(); \
1116 b = (_type)stress_mwc32(); \
1117 \
1118 for (i = 0; i < 1000; i++) { \
1119 int_ops(_type, a, b, c1, c2, c3) \
1120 } \
1121 \
1122 if ((g_opt_flags & OPT_FLAGS_VERIFY) && \
1123 ((a != a_final) || (b != b_final))) \
1124 pr_fail("%s: int" # _sz " error detected, " \
1125 "failed int" # _sz \
1126 " math operations\n", name); \
1127 } \
1128
1129 /* For compilers that support int128 .. */
1130 #if defined(HAVE_INT128_T)
1131
1132 #define _UINT128(hi, lo) ((((__uint128_t)hi << 64) | (__uint128_t)lo))
1133
1134 stress_cpu_int(__uint128_t, 128,
1135 _UINT128(0x132af604d8b9183a,0x5e3af8fa7a663d74),
1136 _UINT128(0x62f086e6160e4e,0xd84c9f800365858),
1137 _UINT128(C1, C1), _UINT128(C2, C2), _UINT128(C3, C3))
1138 #endif
1139
1140 stress_cpu_int(uint64_t, 64, \
1141 0x013f7f6dc1d79197cULL, 0x01863d2c6969a51ceULL,
1142 C1, C2, C3)
1143
1144 stress_cpu_int(uint32_t, 32, \
1145 0x1ce9b547UL, 0xa24b33aUL,
1146 C1, C2, C3)
1147
1148 stress_cpu_int(uint16_t, 16, \
1149 0x1871, 0x07f0,
1150 C1, C2, C3)
1151
1152 stress_cpu_int(uint8_t, 8, \
1153 0x12, 0x1a,
1154 C1, C2, C3)
1155
1156 #define float_ops(_type, a, b, c, d, _sin, _cos) \
1157 do { \
1158 a = a + b; \
1159 b = a * c; \
1160 c = a - b; \
1161 d = a / b; \
1162 a = c / (_type)0.1923L; \
1163 b = c + a; \
1164 c = b * (_type)3.12L; \
1165 d = d + b + (_type)_sin(a); \
1166 a = (b + c) / c; \
1167 b = b * c; \
1168 c = c + (_type)1.0L; \
1169 d = d - (_type)_sin(c); \
1170 a = a * (_type)_cos(b); \
1171 b = b + (_type)_cos(c); \
1172 c = (_type)_sin(a + b) / (_type)2.344L; \
1173 b = d - (_type)1.0L; \
1174 } while (0)
1175
1176 /*
1177 * Generic floating point stressor macro
1178 */
1179 #define stress_cpu_fp(_type, _name, _sin, _cos) \
1180 static void HOT OPTIMIZE3 TARGET_CLONES stress_cpu_ ## _name(const char *name)\
1181 { \
1182 int i; \
1183 const uint32_t r1 = stress_mwc32(), \
1184 r2 = stress_mwc32(); \
1185 _type a = (_type)0.18728L, \
1186 b = (_type)r1, \
1187 c = (_type)r2, \
1188 d = (_type)0.0, \
1189 r; \
1190 \
1191 (void)name; \
1192 \
1193 for (i = 0; i < 1000; i++) { \
1194 float_ops(_type, a, b, c, d, \
1195 _sin, _cos); \
1196 } \
1197 r = a + b + c + d; \
1198 stress_double_put((double)r); \
1199 }
1200
stress_cpu_fp(float,float,shim_sinf,shim_cosf)1201 stress_cpu_fp(float, float, shim_sinf, shim_cosf)
1202 stress_cpu_fp(double, double, shim_sin, shim_cos)
1203 stress_cpu_fp(long double, longdouble, shim_sinl, shim_cosl)
1204 #if defined(HAVE_FLOAT_DECIMAL32) && \
1205 !defined(__clang__)
1206 stress_cpu_fp(_Decimal32, decimal32, shim_sinf, shim_cosf)
1207 #endif
1208 #if defined(HAVE_FLOAT_DECIMAL64) && \
1209 !defined(__clang__)
1210 stress_cpu_fp(_Decimal64, decimal64, shim_sin, shim_cos)
1211 #endif
1212 #if defined(HAVE_FLOAT_DECIMAL128) && \
1213 !defined(__clang__)
1214 stress_cpu_fp(_Decimal128, decimal128, shim_sinl, shim_cosl)
1215 #endif
1216 #if defined(HAVE_FLOAT16) && \
1217 !defined(__clang__)
1218 stress_cpu_fp(__fp16, float16, shim_sin, shim_cos)
1219 #endif
1220 #if defined(HAVE_FLOAT32) && \
1221 !defined(__clang__)
1222 stress_cpu_fp(_Float32, float32, shim_sin, shim_cos)
1223 #endif
1224 #if defined(HAVE_FLOAT64) && \
1225 !defined(__clang__)
1226 stress_cpu_fp(_Float64, float64, shim_sin, shim_cos)
1227 #endif
1228 #if defined(HAVE_FLOAT80) && \
1229 !defined(__clang__)
1230 stress_cpu_fp(__float80, float80, shim_sinl, shim_cosl)
1231 #endif
1232 #if defined(HAVE_FLOAT128) && \
1233 !defined(__clang__)
1234 stress_cpu_fp(__float128, float128, shim_sinl, shim_cosl)
1235 #endif
1236
1237 /* Append floating point literal specifier to literal value */
1238 #define FP(val, ltype) val ## ltype
1239
1240 #if defined(HAVE_COMPLEX_H) && \
1241 defined(HAVE_COMPLEX) && \
1242 defined(__STDC_IEC_559_COMPLEX__) &&\
1243 !defined(__UCLIBC__)
1244 /*
1245 * Generic complex stressor macro
1246 */
1247 #define stress_cpu_complex(_type, _ltype, _name, _csin, _ccos) \
1248 static void HOT OPTIMIZE3 TARGET_CLONES stress_cpu_ ## _name(const char *name)\
1249 { \
1250 int i; \
1251 const uint32_t r1 = stress_mwc32(), \
1252 r2 = stress_mwc32(); \
1253 _type cI = (_type)I; \
1254 _type a = FP(0.18728, _ltype) + \
1255 cI * FP(0.2762, _ltype), \
1256 b = (_type)r1 - cI * FP(0.11121, _ltype), \
1257 c = (_type)r2 + cI * stress_mwc32(), \
1258 d = (_type)0.0, \
1259 r; \
1260 \
1261 (void)name; \
1262 \
1263 for (i = 0; i < 1000; i++) { \
1264 float_ops(_type, a, b, c, d, \
1265 _csin, _ccos); \
1266 } \
1267 r = a + b + c + d; \
1268 stress_double_put((double)r); \
1269 }
1270
1271 stress_cpu_complex(complex float, f, complex_float, shim_csinf, shim_ccosf)
1272 stress_cpu_complex(complex double, , complex_double, shim_csin, shim_ccos)
1273 stress_cpu_complex(complex long double, l, complex_long_double, shim_csinl, shim_ccosl)
1274 #endif
1275
1276 #define int_float_ops(_ftype, flt_a, flt_b, flt_c, flt_d, \
1277 _sin, _cos, _inttype, int_a, int_b, _c1, _c2, _c3) \
1278 do { \
1279 int_a += int_b; \
1280 int_b ^= int_a; \
1281 flt_a = flt_a + flt_b; \
1282 int_a >>= 1; \
1283 int_b <<= 2; \
1284 flt_b = flt_a * flt_c; \
1285 int_b -= int_a; \
1286 int_a ^= ~(_inttype)0; \
1287 flt_c = flt_a - flt_b; \
1288 int_b ^= ~(_c1); \
1289 int_a *= 3; \
1290 flt_d = flt_a / flt_b; \
1291 int_b *= 7; \
1292 int_a += 2; \
1293 flt_a = flt_c / (_ftype)0.1923L; \
1294 int_b -= 3; \
1295 int_a /= 77; \
1296 flt_b = flt_c + flt_a; \
1297 int_b /= 3; \
1298 int_a <<= 1; \
1299 flt_c = flt_b * (_ftype)3.12L; \
1300 int_b <<= 2; \
1301 int_a |= 1; \
1302 flt_d = flt_d + flt_b + (_ftype)_sin(flt_a); \
1303 int_b |= 3; \
1304 int_a *= stress_mwc32(); \
1305 flt_a = (flt_b + flt_c) / flt_c; \
1306 int_b ^= stress_mwc32(); \
1307 int_a += stress_mwc32(); \
1308 flt_b = flt_b * flt_c; \
1309 int_b -= stress_mwc32(); \
1310 int_a /= 7; \
1311 flt_c = flt_c + (_ftype)1.0L; \
1312 int_b /= 9; \
1313 flt_d = flt_d - (_ftype)_sin(flt_c); \
1314 int_a |= (_c2); \
1315 flt_a = flt_a * (_ftype)_cos(flt_b); \
1316 flt_b = flt_b + (_ftype)_cos(flt_c); \
1317 int_b &= (_c3); \
1318 flt_c = (_ftype)_sin(flt_a + flt_b) / (_ftype)2.344L; \
1319 flt_b = flt_d - (_ftype)1.0L; \
1320 } while (0)
1321
1322
1323 /*
1324 * Generic integer and floating point stressor macro
1325 */
1326 #define stress_cpu_int_fp(_inttype, _sz, _ftype, _name, _a, _b, \
1327 _c1, _c2, _c3, _sinf, _cosf) \
1328 static void HOT OPTIMIZE3 TARGET_CLONES stress_cpu_int ## _sz ## _ ## _name(const char *name)\
1329 { \
1330 int i; \
1331 _inttype int_a, int_b; \
1332 const _inttype mask = (_inttype)~0; \
1333 const _inttype a_final = _a; \
1334 const _inttype b_final = _b; \
1335 const _inttype c1 = _c1 & mask; \
1336 const _inttype c2 = _c2 & mask; \
1337 const _inttype c3 = _c3 & mask; \
1338 const uint32_t r1 = stress_mwc32(), \
1339 r2 = stress_mwc32(); \
1340 _ftype flt_a = (_ftype)0.18728L, \
1341 flt_b = (_ftype)r1, \
1342 flt_c = (_ftype)r2, \
1343 flt_d = (_ftype)0.0, \
1344 flt_r; \
1345 \
1346 STRESS_MWC_SEED(); \
1347 int_a = stress_mwc32(); \
1348 int_b = stress_mwc32(); \
1349 \
1350 for (i = 0; i < 1000; i++) { \
1351 int_float_ops(_ftype, flt_a, flt_b, flt_c, \
1352 flt_d,_sinf, _cosf, _inttype, \
1353 int_a, int_b, c1, c2, c3); \
1354 } \
1355 if ((g_opt_flags & OPT_FLAGS_VERIFY) && \
1356 ((int_a != a_final) || (int_b != b_final))) \
1357 pr_fail("%s: int" # _sz " error detected, " \
1358 "failed int" # _sz "" # _ftype \
1359 " math operations\n", name); \
1360 \
1361 flt_r = flt_a + flt_b + flt_c + flt_d; \
1362 stress_double_put((double)flt_r); \
1363 }
1364
1365 stress_cpu_int_fp(uint32_t, 32, float, float,
1366 0x1ce9b547UL, 0xa24b33aUL,
1367 C1, C2, C3, shim_sinf, shim_cosf)
1368 stress_cpu_int_fp(uint32_t, 32, double, double,
1369 0x1ce9b547UL, 0xa24b33aUL,
1370 C1, C2, C3, shim_sin, shim_cos)
1371 stress_cpu_int_fp(uint32_t, 32, long double, longdouble,
1372 0x1ce9b547UL, 0xa24b33aUL,
1373 C1, C2, C3, shim_sinl, shim_cosl)
1374 stress_cpu_int_fp(uint64_t, 64, float, float,
1375 0x13f7f6dc1d79197cULL, 0x1863d2c6969a51ceULL,
1376 C1, C2, C3, shim_sinf, shim_cosf)
1377 stress_cpu_int_fp(uint64_t, 64, double, double,
1378 0x13f7f6dc1d79197cULL, 0x1863d2c6969a51ceULL,
1379 C1, C2, C3, shim_sin, shim_cos)
1380 stress_cpu_int_fp(uint64_t, 64, long double, longdouble,
1381 0x13f7f6dc1d79197cULL, 0x1863d2c6969a51ceULL,
1382 C1, C2, C3, shim_sinl, shim_cosl)
1383
1384 #if defined(HAVE_INT128_T)
1385 stress_cpu_int_fp(__uint128_t, 128, float, float,
1386 _UINT128(0x132af604d8b9183a,0x5e3af8fa7a663d74),
1387 _UINT128(0x0062f086e6160e4e,0x0d84c9f800365858),
1388 _UINT128(C1, C1), _UINT128(C2, C2), _UINT128(C3, C3),
1389 shim_sinf, shim_cosf)
1390 stress_cpu_int_fp(__uint128_t, 128, double, double,
1391 _UINT128(0x132af604d8b9183a,0x5e3af8fa7a663d74),
1392 _UINT128(0x0062f086e6160e4e,0x0d84c9f800365858),
1393 _UINT128(C1, C1), _UINT128(C2, C2), _UINT128(C3, C3),
1394 shim_sin, shim_cos)
1395 stress_cpu_int_fp(__uint128_t, 128, long double, longdouble,
1396 _UINT128(0x132af604d8b9183a,0x5e3af8fa7a663d74),
1397 _UINT128(0x0062f086e6160e4e,0x0d84c9f800365858),
1398 _UINT128(C1, C1), _UINT128(C2, C2), _UINT128(C3, C3),
1399 shim_sinl, shim_cosl)
1400 #if defined(HAVE_FLOAT_DECIMAL32) && \
1401 !defined(__clang__)
1402 stress_cpu_int_fp(__uint128_t, 128, _Decimal32, decimal32,
1403 _UINT128(0x132af604d8b9183a,0x5e3af8fa7a663d74),
1404 _UINT128(0x0062f086e6160e4e,0x0d84c9f800365858),
1405 _UINT128(C1, C1), _UINT128(C2, C2), _UINT128(C3, C3),
1406 (_Decimal32)shim_sinf, (_Decimal32)shim_cosf)
1407 #endif
1408 #if defined(HAVE_FLOAT_DECIMAL64) && \
1409 !defined(__clang__)
1410 stress_cpu_int_fp(__uint128_t, 128, _Decimal64, decimal64,
1411 _UINT128(0x132af604d8b9183a,0x5e3af8fa7a663d74),
1412 _UINT128(0x0062f086e6160e4e,0x0d84c9f800365858),
1413 _UINT128(C1, C1), _UINT128(C2, C2), _UINT128(C3, C3),
1414 (_Decimal64)shim_sin, (_Decimal64)shim_cos)
1415 #endif
1416 #if defined(HAVE_FLOAT_DECIMAL128) && \
1417 !defined(__clang__)
1418 stress_cpu_int_fp(__uint128_t, 128, _Decimal128, decimal128,
1419 _UINT128(0x132af604d8b9183a,0x5e3af8fa7a663d74),
1420 _UINT128(0x0062f086e6160e4e,0x0d84c9f800365858),
1421 _UINT128(C1, C1), _UINT128(C2, C2), _UINT128(C3, C3),
1422 (_Decimal128)shim_sinl, (_Decimal128)shim_cosl)
1423 #endif
1424 #endif
1425
1426 /*
1427 * stress_cpu_rgb()
1428 * CCIR 601 RGB to YUV to RGB conversion
1429 */
1430 static void HOT OPTIMIZE3 TARGET_CLONES stress_cpu_rgb(const char *name)
1431 {
1432 int i;
1433 uint32_t rgb = stress_mwc32() & 0xffffff;
1434 uint8_t r = (uint8_t)(rgb >> 16);
1435 uint8_t g = (uint8_t)(rgb >> 8);
1436 uint8_t b = (uint8_t)rgb;
1437
1438 (void)name;
1439
1440 /* Do a 1000 colours starting from the rgb seed */
1441 for (i = 0; i < 1000; i++) {
1442 float y, u, v;
1443
1444 /* RGB to CCIR 601 YUV */
1445 y = (0.299f * r) + (0.587f * g) + (0.114f * b);
1446 u = (b - y) * 0.565f;
1447 v = (r - y) * 0.713f;
1448
1449 /* YUV back to RGB */
1450 r = (uint8_t)(y + (1.403f * v));
1451 g = (uint8_t)(y - (0.344f * u) - (0.714f * v));
1452 b = (uint8_t)(y + (1.770f * u));
1453
1454 /* And bump each colour to make next round */
1455 r += 1;
1456 g += 2;
1457 b += 3;
1458 stress_uint64_put(r + g + b);
1459 }
1460 }
1461
1462 /*
1463 * stress_cpu_matrix_prod(void)
1464 * matrix product
1465 */
stress_cpu_matrix_prod(const char * name)1466 static void HOT OPTIMIZE3 TARGET_CLONES stress_cpu_matrix_prod(const char *name)
1467 {
1468 int i, j, k;
1469
1470 static long double a[MATRIX_PROD_SIZE][MATRIX_PROD_SIZE],
1471 b[MATRIX_PROD_SIZE][MATRIX_PROD_SIZE],
1472 r[MATRIX_PROD_SIZE][MATRIX_PROD_SIZE];
1473 long double v = 1 / (long double)((uint32_t)~0);
1474 long double sum = 0.0L;
1475
1476 (void)name;
1477
1478 for (i = 0; i < MATRIX_PROD_SIZE; i++) {
1479 for (j = 0; j < MATRIX_PROD_SIZE; j++) {
1480 const uint32_t r1 = stress_mwc32();
1481 const uint32_t r2 = stress_mwc32();
1482
1483 a[i][j] = (long double)r1 * v;
1484 b[i][j] = (long double)r2 * v;
1485 r[i][j] = 0.0L;
1486 }
1487 }
1488
1489 for (i = 0; i < MATRIX_PROD_SIZE; i++) {
1490 for (j = 0; j < MATRIX_PROD_SIZE; j++) {
1491 for (k = 0; k < MATRIX_PROD_SIZE; k++) {
1492 r[i][j] += a[i][k] * b[k][j];
1493 }
1494 }
1495 }
1496
1497 for (i = 0; i < MATRIX_PROD_SIZE; i++)
1498 for (j = 0; j < MATRIX_PROD_SIZE; j++)
1499 sum += r[i][j];
1500 stress_long_double_put(sum);
1501 }
1502
1503 /*
1504 * stress_cpu_fibonacci()
1505 * compute fibonacci series
1506 */
stress_cpu_fibonacci(const char * name)1507 static void HOT OPTIMIZE3 stress_cpu_fibonacci(const char *name)
1508 {
1509 const uint64_t fn_res = 0xa94fad42221f2702ULL;
1510 register uint64_t f1 = 0, f2 = 1, fn;
1511
1512 do {
1513 fn = f1 + f2;
1514 f1 = f2;
1515 f2 = fn;
1516 } while (!(fn & 0x8000000000000000ULL));
1517
1518 if ((g_opt_flags & OPT_FLAGS_VERIFY) && (fn_res != fn))
1519 pr_fail("%s: fibonacci error detected, summation "
1520 "or assignment failure\n", name);
1521 }
1522
1523 /*
1524 * stress_cpu_psi
1525 * compute the constant psi,
1526 * the reciprocal Fibonacci constant
1527 */
stress_cpu_psi(const char * name)1528 static void HOT OPTIMIZE3 stress_cpu_psi(const char *name)
1529 {
1530 long double f1 = 0.0L, f2 = 1.0L;
1531 long double psi = 0.0L, last_psi;
1532 long double precision = 1.0e-20L;
1533 int i = 0;
1534 const int max_iter = 100;
1535
1536 do {
1537 long double fn = f1 + f2;
1538 f1 = f2;
1539 f2 = fn;
1540 last_psi = psi;
1541 psi += 1.0L / f1;
1542 i++;
1543 } while ((i < max_iter) && (shim_fabsl(psi - last_psi) > precision));
1544
1545 if (g_opt_flags & OPT_FLAGS_VERIFY) {
1546 if (shim_fabsl(psi - PSI) > 1.0e-15L)
1547 pr_fail("%s: calculation of reciprocal "
1548 "Fibonacci constant phi not as accurate "
1549 "as expected\n", name);
1550 if (i >= max_iter)
1551 pr_fail("%s: calculation of reciprocal "
1552 "Fibonacci constant took more iterations "
1553 "than expected\n", name);
1554 }
1555
1556 stress_long_double_put(psi);
1557 }
1558
1559 /*
1560 * stress_cpu_ln2
1561 * compute ln(2) using series
1562 */
stress_cpu_ln2(const char * name)1563 static void HOT OPTIMIZE3 TARGET_CLONES stress_cpu_ln2(const char *name)
1564 {
1565 long double ln2 = 0.0L, last_ln2 = 0.0L;
1566 long double precision = 1.0e-7L;
1567 register int n = 1;
1568 const int max_iter = 10000;
1569
1570 /* Not the fastest converging series */
1571 do {
1572 last_ln2 = ln2;
1573 /* Unroll, do several ops */
1574 ln2 += (long double)1.0L / (long double)n++;
1575 ln2 -= (long double)1.0L / (long double)n++;
1576 ln2 += (long double)1.0L / (long double)n++;
1577 ln2 -= (long double)1.0L / (long double)n++;
1578 ln2 += (long double)1.0L / (long double)n++;
1579 ln2 -= (long double)1.0L / (long double)n++;
1580 ln2 += (long double)1.0L / (long double)n++;
1581 ln2 -= (long double)1.0L / (long double)n++;
1582 } while ((n < max_iter) && (shim_fabsl(ln2 - last_ln2) > precision));
1583
1584 if ((g_opt_flags & OPT_FLAGS_VERIFY) && (n >= max_iter))
1585 pr_fail("%s: calculation of ln(2) took more "
1586 "iterations than expected\n", name);
1587
1588 stress_long_double_put(ln2);
1589 }
1590
1591 /*
1592 * ackermann()
1593 * a naive/simple implementation of the ackermann function
1594 */
ackermann(const uint32_t m,const uint32_t n)1595 static uint32_t HOT ackermann(const uint32_t m, const uint32_t n)
1596 {
1597 if (m == 0)
1598 return n + 1;
1599 else if (n == 0)
1600 return ackermann(m - 1, 1);
1601 else
1602 return ackermann(m - 1, ackermann(m, n - 1));
1603 }
1604
1605 /*
1606 * stress_cpu_ackermann
1607 * compute ackermann function
1608 */
stress_cpu_ackermann(const char * name)1609 static void stress_cpu_ackermann(const char *name)
1610 {
1611 uint32_t a = ackermann(3, 7);
1612
1613 if ((g_opt_flags & OPT_FLAGS_VERIFY) && (a != 0x3fd))
1614 pr_fail("%s: ackermann error detected, "
1615 "ackermann(3,9) miscalculated\n", name);
1616 }
1617
1618 /*
1619 * stress_cpu_explog
1620 * compute exp(log(n))
1621 */
stress_cpu_explog(const char * name)1622 static void HOT stress_cpu_explog(const char *name)
1623 {
1624 uint32_t i;
1625 double n = 1e6;
1626
1627 (void)name;
1628
1629 for (i = 1; i < 100000; i++)
1630 n = exp(log(n) / 1.00002);
1631 }
1632
1633 /*
1634 * This could be a ternary operator, v = (v op val) ? a : b
1635 * but it may be optimised down, so force a compare and jmp
1636 * with -O0 and a if/else construct
1637 */
1638 #define JMP(v, op, val, a, b) \
1639 do { \
1640 if (v op val) \
1641 v = a; \
1642 else \
1643 v = b; \
1644 stress_uint32_put((uint32_t)(next + i));\
1645 } while (0)
1646
1647 /*
1648 * stress_cpu_jmp
1649 * jmp conditionals
1650 */
stress_cpu_jmp(const char * name)1651 static void HOT OPTIMIZE0 stress_cpu_jmp(const char *name)
1652 {
1653 register int i, next = 0;
1654
1655 (void)name;
1656
1657 for (i = 1; i < 1000; i++) {
1658 /* Force lots of compare jmps */
1659 JMP(next, ==, 1, 2, 3);
1660 JMP(next, >, 2, 0, 1);
1661 JMP(next, <, 1, 1, 0);
1662 JMP(next, ==, 1, 2, 3);
1663 JMP(next, >, 2, 0, 1);
1664 JMP(next, <, 1, 1, 0);
1665 JMP(next, ==, 1, 2, 3);
1666 JMP(next, >, 2, 0, 1);
1667 JMP(next, <, 1, 1, 0);
1668 JMP(next, ==, 1, 2, 3);
1669 JMP(next, >, 2, 0, 1);
1670 JMP(next, <, 1, 1, 0);
1671 }
1672 }
1673
1674 /*
1675 * ccitt_crc16()
1676 * perform naive CCITT CRC16
1677 */
ccitt_crc16(const uint8_t * data,size_t n)1678 static uint16_t HOT OPTIMIZE3 ccitt_crc16(const uint8_t *data, size_t n)
1679 {
1680 /*
1681 * The CCITT CRC16 polynomial is
1682 * 16 12 5
1683 * x + x + x + 1
1684 *
1685 * which is 0x11021, but to make the computation
1686 * simpler, this has been reversed to 0x8408 and
1687 * the top bit ignored..
1688 * We can get away with a 17 bit polynomial
1689 * being represented by a 16 bit value because
1690 * we are assuming the top bit is always set.
1691 */
1692 const uint16_t polynomial = 0x8408;
1693 register uint16_t crc = 0xffff;
1694
1695 if (!n)
1696 return 0;
1697
1698 for (; n; n--) {
1699 uint8_t i;
1700 uint8_t val = (uint16_t)0xff & *data++;
1701
1702 for (i = 8; i; --i, val >>= 1) {
1703 bool do_xor = 1 & (val ^ crc);
1704 crc >>= 1;
1705 crc ^= do_xor ? polynomial : 0;
1706 }
1707 }
1708
1709 crc = ~crc;
1710 return ((uint16_t)(crc << 8)) | (crc >> 8);
1711 }
1712
1713 /*
1714 * stress_cpu_crc16
1715 * compute 1024 rounds of CCITT CRC16
1716 */
stress_cpu_crc16(const char * name)1717 static void stress_cpu_crc16(const char *name)
1718 {
1719 uint8_t buffer[1024];
1720 size_t i;
1721
1722 (void)name;
1723
1724 random_buffer(buffer, sizeof(buffer));
1725 for (i = 1; i < sizeof(buffer); i++)
1726 stress_uint64_put(ccitt_crc16(buffer, i));
1727 }
1728
1729 /*
1730 * fletcher16
1731 * naive implementation of fletcher16 checksum
1732 */
fletcher16(const uint8_t * data,const size_t len)1733 static uint16_t HOT OPTIMIZE3 fletcher16(const uint8_t *data, const size_t len)
1734 {
1735 register uint16_t sum1 = 0, sum2 = 0;
1736 register size_t i;
1737
1738 for (i = 0; i < len; i++) {
1739 sum1 = (sum1 + data[i]) % 255;
1740 sum2 = (sum2 + sum1) % 255;
1741 }
1742 return ((uint16_t)(sum2 << 8)) | sum1;
1743 }
1744
1745 /*
1746 * stress_cpu_fletcher16()
1747 * compute 1024 rounds of fletcher16 checksum
1748 */
stress_cpu_fletcher16(const char * name)1749 static void stress_cpu_fletcher16(const char *name)
1750 {
1751 uint8_t buffer[1024];
1752 size_t i;
1753
1754 (void)name;
1755
1756 random_buffer((uint8_t *)buffer, sizeof(buffer));
1757 for (i = 1; i < sizeof(buffer); i++)
1758 stress_uint16_put(fletcher16(buffer, i));
1759 }
1760
1761 /*
1762 * stress_cpu_ipv4checksum
1763 * compute 1024 rounds of IPv4 checksum
1764 */
stress_cpu_ipv4checksum(const char * name)1765 static void stress_cpu_ipv4checksum(const char *name)
1766 {
1767 uint16_t buffer[512];
1768 size_t i;
1769
1770 (void)name;
1771
1772 random_buffer((uint8_t *)buffer, sizeof(buffer));
1773 for (i = 1; i < sizeof(buffer); i++)
1774 stress_uint16_put(stress_ipv4_checksum(buffer, i));
1775 }
1776
1777 #if defined(HAVE_COMPLEX_H) && \
1778 defined(HAVE_COMPLEX) && \
1779 defined(__STDC_IEC_559_COMPLEX__) &&\
1780 !defined(__UCLIBC__)
1781 /*
1782 * zeta()
1783 * Riemann zeta function
1784 */
zeta(const long double complex s,long double precision)1785 static inline long double complex HOT OPTIMIZE3 zeta(
1786 const long double complex s,
1787 long double precision)
1788 {
1789 int i = 1;
1790 long double complex z = 0.0L, zold = 0.0L;
1791
1792 do {
1793 double complex pwr = shim_cpow(i++, (complex double)s);
1794 zold = z;
1795 z += 1 / (long double complex)pwr;
1796 } while (shim_cabsl(z - zold) > precision);
1797
1798 return z;
1799 }
1800
1801 /*
1802 * stress_cpu_zeta()
1803 * stress test Zeta(2.0)..Zeta(10.0)
1804 */
stress_cpu_zeta(const char * name)1805 static void stress_cpu_zeta(const char *name)
1806 {
1807 long double precision = 0.00000001L;
1808 int i;
1809
1810 (void)name;
1811
1812 for (i = 2; i < 11; i++) {
1813 long double complex z = zeta((long double complex)i, precision);
1814
1815 stress_long_double_put((long double)z);
1816 }
1817 }
1818 #endif
1819
1820 /*
1821 * stress_cpu_gamma()
1822 * stress Euler–Mascheroni constant gamma
1823 */
stress_cpu_gamma(const char * name)1824 static void HOT OPTIMIZE3 stress_cpu_gamma(const char *name)
1825 {
1826 long double precision = 1.0e-10L;
1827 long double sum = 0.0L, k = 1.0L, _gamma = 0.0L, gammaold;
1828
1829 do {
1830 gammaold = _gamma;
1831 sum += 1.0L / k;
1832 _gamma = sum - shim_logl(k);
1833 k += 1.0L;
1834 } while ((k < 1e6L) && shim_fabsl(_gamma - gammaold) > precision);
1835
1836 stress_long_double_put(_gamma);
1837
1838 if (g_opt_flags & OPT_FLAGS_VERIFY) {
1839 if (shim_fabsl(_gamma - GAMMA) > 1.0e-5L)
1840 pr_fail("%s: calculation of Euler-Mascheroni "
1841 "constant not as accurate as expected\n", name);
1842 if (k > 80000.0L)
1843 pr_fail("%s: calculation of Euler-Mascheroni "
1844 "constant took more iterations than "
1845 "expected\n", name);
1846 }
1847
1848 }
1849
1850 /*
1851 * stress_cpu_correlate()
1852 *
1853 * Introduction to Signal Processing,
1854 * Prentice-Hall, 1995, ISBN: 0-13-209172-0.
1855 */
stress_cpu_correlate(const char * name)1856 static void HOT OPTIMIZE3 stress_cpu_correlate(const char *name)
1857 {
1858 size_t i, j;
1859 double data_average = 0.0;
1860 static double data[CORRELATE_DATA_LEN];
1861 static double corr[CORRELATE_LEN + 1];
1862
1863 (void)name;
1864
1865 /* Generate some random data */
1866 for (i = 0; i < CORRELATE_DATA_LEN; i++) {
1867 const uint64_t r = stress_mwc64();
1868
1869 data[i] = (double)r;
1870 data_average += data[i];
1871 }
1872 data_average /= (double)CORRELATE_DATA_LEN;
1873
1874 /* And correlate */
1875 for (i = 0; i <= CORRELATE_LEN; i++) {
1876 corr[i] = 0.0;
1877 for (j = 0; j < CORRELATE_DATA_LEN - i; j++) {
1878 corr[i] += (data[i + j] - data_average) *
1879 (data[j] - data_average);
1880 }
1881 corr[i] /= (double)CORRELATE_LEN;
1882 stress_double_put(corr[i]);
1883 }
1884 }
1885
1886
1887 /*
1888 * stress_cpu_sieve()
1889 * slightly optimised Sieve of Eratosthenes
1890 */
stress_cpu_sieve(const char * name)1891 static void HOT OPTIMIZE3 stress_cpu_sieve(const char *name)
1892 {
1893 const double dsqrt = shim_sqrt(SIEVE_SIZE);
1894 const uint32_t nsqrt = (uint32_t)dsqrt;
1895 static uint32_t sieve[(SIEVE_SIZE + 31) / 32];
1896 uint32_t i, j;
1897
1898 (void)memset(sieve, 0xff, sizeof(sieve));
1899 for (i = 2; i < nsqrt; i++)
1900 if (STRESS_GETBIT(sieve, i))
1901 for (j = i * i; j < SIEVE_SIZE; j += i)
1902 STRESS_CLRBIT(sieve, j);
1903
1904 /* And count up number of primes */
1905 for (j = 0, i = 2; i < SIEVE_SIZE; i++) {
1906 if (STRESS_GETBIT(sieve, i))
1907 j++;
1908 }
1909 if ((g_opt_flags & OPT_FLAGS_VERIFY) && (j != 10000))
1910 pr_fail("%s: sieve error detected, number of "
1911 "primes has been miscalculated\n", name);
1912 }
1913
1914 /*
1915 * is_prime()
1916 * return true if n is prime
1917 * http://en.wikipedia.org/wiki/Primality_test
1918 */
is_prime(uint32_t n)1919 static inline HOT OPTIMIZE3 ALWAYS_INLINE uint32_t is_prime(uint32_t n)
1920 {
1921 register uint32_t i, max;
1922 double dsqrt;
1923
1924 if (UNLIKELY(n <= 3))
1925 return n >= 2;
1926 if ((n % 2 == 0) || (n % 3 == 0))
1927 return 0;
1928
1929 dsqrt = shim_sqrt(n);
1930 max = (uint32_t)dsqrt + 1;
1931 for (i = 5; i < max; i+= 6)
1932 if ((n % i == 0) || (n % (i + 2) == 0))
1933 return 0;
1934 return 1;
1935 }
1936
1937 /*
1938 * stress_cpu_prime()
1939 *
1940 */
stress_cpu_prime(const char * name)1941 static void stress_cpu_prime(const char *name)
1942 {
1943 uint32_t i, nprimes = 0;
1944
1945 for (i = 0; i < SIEVE_SIZE; i++) {
1946 nprimes += is_prime(i);
1947 }
1948
1949 if ((g_opt_flags & OPT_FLAGS_VERIFY) && (nprimes != 10000))
1950 pr_fail("%s: prime error detected, number of primes "
1951 "has been miscalculated\n", name);
1952 }
1953
1954 /*
1955 * stress_cpu_gray()
1956 * compute gray codes
1957 */
stress_cpu_gray(const char * name)1958 static void HOT OPTIMIZE3 TARGET_CLONES stress_cpu_gray(const char *name)
1959 {
1960 register uint32_t i;
1961 register uint64_t sum = 0;
1962
1963 for (i = 0; i < 0x10000; i++) {
1964 register uint32_t gray_code;
1965
1966 /* Binary to Gray code */
1967 gray_code = (i >> 1) ^ i;
1968 sum += gray_code;
1969
1970 /* Gray code back to binary */
1971 #if 0
1972 {
1973 /* Slow iterative method */
1974 register uint32_t mask;
1975
1976 for (mask = gray_code >> 1; mask; mask >>= 1)
1977 gray_code ^= mask;
1978 }
1979 #else
1980 /* Fast non-loop method */
1981 gray_code ^= (gray_code >> 1);
1982 gray_code ^= (gray_code >> 2);
1983 gray_code ^= (gray_code >> 4);
1984 gray_code ^= (gray_code >> 8);
1985 gray_code ^= (gray_code >> 16);
1986 #endif
1987 sum += gray_code;
1988 }
1989 if ((g_opt_flags & OPT_FLAGS_VERIFY) && (sum != 0xffff0000))
1990 pr_fail("%s: gray code error detected, sum of gray "
1991 "codes between 0x00000 and 0x10000 miscalculated\n",
1992 name);
1993 }
1994
1995 /*
1996 * hanoi()
1997 * do a Hanoi move
1998 */
hanoi(const uint16_t n,const char p1,const char p2,const char p3)1999 static uint32_t HOT hanoi(
2000 const uint16_t n,
2001 const char p1,
2002 const char p2,
2003 const char p3)
2004 {
2005 if (UNLIKELY(n == 0)) {
2006 /* Move p1 -> p2 */
2007 return 1;
2008 } else {
2009 uint32_t m = hanoi(n - 1, p1, p3, p2);
2010 /* Move p1 -> p2 */
2011 m += hanoi(n - 1, p3, p2, p1);
2012 return m;
2013 }
2014 }
2015
2016 /*
2017 * stress_cpu_hanoi
2018 * stress with recursive Towers of Hanoi
2019 */
stress_cpu_hanoi(const char * name)2020 static void stress_cpu_hanoi(const char *name)
2021 {
2022 uint32_t n = hanoi(20, 'X', 'Y', 'Z');
2023
2024 if ((g_opt_flags & OPT_FLAGS_VERIFY) && (n != 1048576))
2025 pr_fail("%s: number of hanoi moves different from "
2026 "the expected number\n", name);
2027
2028 stress_uint64_put(n);
2029 }
2030
2031 /*
2032 * stress_floatconversion
2033 * exercise conversion to/from different floating point values
2034 */
stress_cpu_floatconversion(const char * name)2035 static void TARGET_CLONES stress_cpu_floatconversion(const char *name)
2036 {
2037 float f_sum = 0.0;
2038 double d_sum = 0.0;
2039 long double ld_sum = 0.0L;
2040 register uint32_t i, j_sum = 0;
2041
2042 (void)name;
2043
2044 for (i = 0; i < 65536; i++) {
2045 float f;
2046 double d;
2047 long double ld;
2048
2049 f = (float)i;
2050 d = (double)f;
2051 ld = (long double)d;
2052
2053 f_sum += f;
2054 d_sum += d;
2055 ld_sum += ld;
2056 j_sum += (uint32_t)ld;
2057
2058 f = (float)(double)i;
2059 f_sum += f;
2060 f = (float)(long double)i;
2061 f_sum += f;
2062 f = (float)(double)(long double)i;
2063 f_sum += f;
2064 f = (float)(long double)(double)i;
2065 f_sum += f;
2066
2067 d = (double)(long double)f;
2068 d_sum += d;
2069 d = (double)(float)f;
2070 d_sum += d;
2071 d = (double)(long double)(float)f;
2072 d_sum += d;
2073 d = (double)(float)(long double)f;
2074 d_sum += d;
2075
2076 ld = (long double)(float)d;
2077 ld_sum += ld;
2078 ld = (long double)(double)d;
2079 ld_sum += ld;
2080 ld = (long double)(float)(double)d;
2081 ld_sum += ld;
2082 ld = (long double)(double)(float)d;
2083 ld_sum += ld;
2084 }
2085 stress_long_double_put(ld_sum);
2086 stress_double_put(d_sum);
2087 stress_float_put(f_sum);
2088 stress_uint32_put(j_sum);
2089 }
2090
2091 /*
2092 * stress_intconversion
2093 * exercise conversion to/from different int values
2094 */
stress_cpu_intconversion(const char * name)2095 static void stress_cpu_intconversion(const char *name)
2096 {
2097 int16_t i16_sum = (int16_t)stress_mwc16();
2098 int32_t i32_sum = (int32_t)stress_mwc32();
2099 int64_t i64_sum = (int64_t)stress_mwc64();
2100
2101 register uint32_t i;
2102
2103 (void)name;
2104
2105 for (i = 0; i < 65536; i++) {
2106 int16_t i16;
2107 int32_t i32;
2108 int64_t i64;
2109
2110 i16 = (int16_t)i;
2111 i32 = (int32_t)i;
2112 i64 = (int64_t)i;
2113
2114 i16_sum += i16;
2115 i32_sum += i32;
2116 i64_sum += i64;
2117
2118 i16 = -(int16_t)(uint32_t)-(int64_t)(uint64_t)i64_sum;
2119 i16_sum -= i16;
2120 i32 = -(int16_t)(uint32_t)-(int64_t)(uint64_t)i16_sum;
2121 i32_sum -= i32;
2122 i64 = -(int16_t)(uint32_t)-(int64_t)(uint64_t)i32_sum;
2123 i64_sum -= i64;
2124
2125 i16 = -(int16_t)(uint64_t)-(int32_t)(uint64_t)i64_sum;
2126 i16_sum += i16;
2127 i32 = -(int16_t)(uint64_t)-(int32_t)(uint64_t)i16_sum;
2128 i32_sum += i32;
2129 i64 = -(int16_t)(uint64_t)-(int32_t)(uint64_t)i32_sum;
2130 i64_sum += i64;
2131
2132 i16 = (int16_t)-((int32_t)(uint16_t)-(int64_t)(uint64_t)i64_sum);
2133 i16_sum -= i16;
2134 i32 = -(int32_t)(uint16_t)-(int64_t)(uint64_t)i16_sum;
2135 i32_sum -= i32;
2136 i64 = -(int32_t)(uint16_t)-(int64_t)(uint64_t)i32_sum;
2137 i64_sum -= i64;
2138
2139 i16 = (int16_t)-((int32_t)(uint64_t)-(int16_t)(uint64_t)i64_sum);
2140 i16_sum += i16;
2141 i32 = -(int32_t)(uint64_t)-(int16_t)(uint64_t)i16_sum;
2142 i32_sum += i32;
2143 i64 = -(int32_t)(uint64_t)-(int16_t)(uint64_t)i32_sum;
2144 i64_sum += i64;
2145
2146 i16 = (int16_t)-((int64_t)(uint16_t)-(int32_t)(uint64_t)i64_sum);
2147 i16_sum -= i16;
2148 i32 = (int32_t)-((int64_t)(uint16_t)-(int32_t)(uint64_t)i16_sum);
2149 i32_sum -= i32;
2150 i64 = (int64_t)(uint16_t)-(int32_t)(uint64_t)i32_sum;
2151 i64_sum -= i64;
2152
2153 i16 = (int16_t)-((int64_t)(uint32_t)-(int16_t)(uint64_t)i64_sum);
2154 i16_sum += i16;
2155 i32 = (int32_t)-((int64_t)(uint32_t)-(int16_t)(uint64_t)i16_sum);
2156 i32_sum += i32;
2157 i64 = -(int64_t)(uint32_t)-(int16_t)(uint64_t)i32_sum;
2158 i64_sum += i64;
2159 }
2160 stress_uint16_put((uint16_t)i16_sum);
2161 stress_uint32_put((uint32_t)i32_sum);
2162 stress_uint64_put((uint64_t)i64_sum);
2163 }
2164
2165 /*
2166 * factorial()
2167 * compute n!
2168 */
factorial(int n)2169 static inline long double HOT OPTIMIZE3 factorial(int n)
2170 {
2171 static const long double factorials[] = {
2172 1.0L,
2173 1.0L,
2174 2.0L,
2175 6.0L,
2176 24.0L,
2177 120.0L,
2178 720.0L,
2179 5040.0L,
2180 40320.0L,
2181 362880.0L,
2182 3628800.0L,
2183 39916800.0L,
2184 479001600.0L,
2185 6227020800.0L,
2186 87178291200.0L,
2187 1307674368000.0L,
2188 20922789888000.0L,
2189 355687428096000.0L,
2190 6402373705728000.0L,
2191 121645100408832000.0L,
2192 2432902008176640000.0L,
2193 51090942171709440000.0L,
2194 1124000727777607680000.0L,
2195 25852016738884976640000.0L,
2196 620448401733239439360000.0L,
2197 15511210043330985984000000.0L,
2198 403291461126605635592388608.0L,
2199 10888869450418352161430700032.0L,
2200 304888344611713860511469666304.0L,
2201 8841761993739701954695181369344.0L,
2202 265252859812191058647452510846976.0L,
2203 8222838654177922818071027836256256.0L,
2204 263130836933693530178272890760200192.0L
2205 };
2206
2207 if (n < (int)SIZEOF_ARRAY(factorials))
2208 return factorials[n];
2209
2210 return roundl(shim_expl(shim_lgammal((long double)(n + 1))));
2211 }
2212
2213 /*
2214 * stress_cpu_pi()
2215 * compute pi using the Srinivasa Ramanujan
2216 * fast convergence algorithm
2217 */
stress_cpu_pi(const char * name)2218 static void HOT OPTIMIZE3 stress_cpu_pi(const char *name)
2219 {
2220 long double s = 0.0L, pi = 0.0L, last_pi = 0.0L;
2221 const long double precision = 1.0e-20L;
2222 const long double c = 2.0L * shim_sqrtl(2.0L) / 9801.0L;
2223 const int max_iter = 5;
2224 int k = 0;
2225
2226 do {
2227 last_pi = pi;
2228 s += (factorial(4 * k) *
2229 ((26390.0L * (long double)k) + 1103)) /
2230 (shim_powl(factorial(k), 4.0L) * shim_powl(396.0L, 4.0L * k));
2231 pi = 1 / (s * c);
2232 k++;
2233 } while ((k < max_iter) && (shim_fabsl(pi - last_pi) > precision));
2234
2235 /* Quick sanity checks */
2236 if (g_opt_flags & OPT_FLAGS_VERIFY) {
2237 if (k >= max_iter)
2238 pr_fail("%s: number of iterations to compute "
2239 "pi was more than expected\n", name);
2240 if (shim_fabsl(pi - PI) > 1.0e-15L)
2241 pr_fail("%s: accuracy of computed pi is not "
2242 "as good as expected\n", name);
2243 }
2244
2245 stress_long_double_put(pi);
2246 }
2247
2248 /*
2249 * stress_cpu_omega()
2250 * compute the constant omega
2251 * See http://en.wikipedia.org/wiki/Omega_constant
2252 */
stress_cpu_omega(const char * name)2253 static void HOT OPTIMIZE3 stress_cpu_omega(const char *name)
2254 {
2255 long double omega = 0.5L, last_omega = 0.0L;
2256 const long double precision = 1.0e-20L;
2257 const int max_iter = 6;
2258 int n = 0;
2259
2260 /*
2261 * Omega converges very quickly, on most CPUs it is
2262 * within 6 iterations.
2263 */
2264 do {
2265 last_omega = omega;
2266 omega = (1 + omega) / (1 + shim_expl(omega));
2267 n++;
2268 } while ((n < max_iter) && (shim_fabsl(omega - last_omega) > precision));
2269
2270 if (g_opt_flags & OPT_FLAGS_VERIFY) {
2271 if (n > max_iter)
2272 pr_fail("%s: number of iterations to compute "
2273 "omega was more than expected (%d vs %d)\n",
2274 name, n, max_iter);
2275 if (shim_fabsl(omega - OMEGA) > 1.0e-16L)
2276 pr_fail("%s: accuracy of computed omega is "
2277 "not as good as expected\n", name);
2278 }
2279
2280 stress_long_double_put(omega);
2281 }
2282
2283 #define HAMMING(G, i, nybble, code) \
2284 do { \
2285 int8_t res; \
2286 res = (((G[3] >> i) & (nybble >> 3)) & 1) ^ \
2287 (((G[2] >> i) & (nybble >> 2)) & 1) ^ \
2288 (((G[1] >> i) & (nybble >> 1)) & 1) ^ \
2289 (((G[0] >> i) & (nybble >> 0)) & 1); \
2290 code ^= ((res & 1) << i); \
2291 } while (0)
2292
2293 /*
2294 * hamming84()
2295 * compute Hamming (8,4) codes
2296 */
hamming84(const uint8_t nybble)2297 static uint8_t HOT OPTIMIZE3 hamming84(const uint8_t nybble)
2298 {
2299 /*
2300 * Hamming (8,4) Generator matrix
2301 * (4 parity bits, 4 data bits)
2302 *
2303 * p1 p2 p3 p4 d1 d2 d3 d4
2304 * 0 1 1 1 1 0 0 0
2305 * 1 0 1 1 0 1 0 0
2306 * 1 1 0 1 0 0 1 0
2307 * 1 1 1 0 0 0 0 1
2308 *
2309 * Where:
2310 * d1..d4 = 4 data bits
2311 * p1..p4 = 4 parity bits:
2312 * p1 = d2 + d3 + d4
2313 * p2 = d1 + d3 + d4
2314 * p3 = d1 + d2 + d4
2315 * p4 = d1 + d2 + d3
2316 *
2317 * G[] is reversed to turn G[3-j] into G[j] to save a subtraction
2318 */
2319 static const uint8_t G[] = {
2320 0xf1, /* 0b11110001 */
2321 0xd2, /* 0b11010010 */
2322 0xb4, /* 0b10110100 */
2323 0x78, /* 0b01111000 */
2324 };
2325
2326 register uint8_t code = 0;
2327
2328 /* Unrolled 8 bit loop x unrolled 4 bit loop */
2329 HAMMING(G, 7, nybble, code);
2330 HAMMING(G, 6, nybble, code);
2331 HAMMING(G, 5, nybble, code);
2332 HAMMING(G, 4, nybble, code);
2333 HAMMING(G, 3, nybble, code);
2334 HAMMING(G, 2, nybble, code);
2335 HAMMING(G, 1, nybble, code);
2336 HAMMING(G, 0, nybble, code);
2337
2338 return code;
2339 }
2340
2341 /*
2342 * stress_cpu_hamming()
2343 * compute hamming code on 65536 x 4 nybbles
2344 */
stress_cpu_hamming(const char * name)2345 static void HOT OPTIMIZE3 TARGET_CLONES stress_cpu_hamming(const char *name)
2346 {
2347 uint32_t i;
2348 uint32_t sum = 0;
2349
2350 for (i = 0; i < 65536; i++) {
2351 uint32_t encoded;
2352
2353 /* 4 x 4 bits to 4 x 8 bits hamming encoded */
2354 encoded = (uint32_t)(hamming84((i >> 12) & 0xf) << 24) |
2355 (uint32_t)(hamming84((i >> 8) & 0xf) << 16) |
2356 (uint32_t)(hamming84((i >> 4) & 0xf) << 8) |
2357 (uint32_t)(hamming84((i >> 0) & 0xf) << 0);
2358 sum += encoded;
2359 }
2360
2361 if ((g_opt_flags & OPT_FLAGS_VERIFY) && (sum != 0xffff8000))
2362 pr_fail("%s: hamming error detected, sum of 65536 "
2363 "hamming codes not correct\n", name);
2364 }
2365
2366
stress_cpu_callfunc_func(ssize_t n,uint64_t u64arg,uint32_t u32arg,uint16_t u16arg,uint8_t u8arg,uint64_t * p_u64arg,uint32_t * p_u32arg,uint16_t * p_u16arg,uint8_t * p_u8arg)2367 static ptrdiff_t stress_cpu_callfunc_func(
2368 ssize_t n,
2369 uint64_t u64arg,
2370 uint32_t u32arg,
2371 uint16_t u16arg,
2372 uint8_t u8arg,
2373 uint64_t *p_u64arg,
2374 uint32_t *p_u32arg,
2375 uint16_t *p_u16arg,
2376 uint8_t *p_u8arg)
2377 {
2378 if (LIKELY(n > 0))
2379 return stress_cpu_callfunc_func(n - 1,
2380 u64arg, u32arg, u16arg, u8arg,
2381 p_u64arg, p_u32arg, p_u16arg, p_u8arg);
2382 else
2383 return &u64arg - p_u64arg;
2384 }
2385
2386 /*
2387 * stress_cpu_callfunc()
2388 * deep function calls
2389 */
stress_cpu_callfunc(const char * name)2390 static void stress_cpu_callfunc(const char *name)
2391 {
2392 uint64_t u64arg = stress_mwc64();
2393 uint32_t u32arg = stress_mwc32();
2394 uint16_t u16arg = stress_mwc16();
2395 uint8_t u8arg = stress_mwc8();
2396 ptrdiff_t ret;
2397
2398 (void)name;
2399
2400 ret = stress_cpu_callfunc_func(1024,
2401 u64arg, u32arg, u16arg, u8arg,
2402 &u64arg, &u32arg, &u16arg, &u8arg);
2403
2404 stress_uint64_put((uint64_t)ret);
2405 }
2406
2407
2408 #define P2(n) n, n^1, n^1, n
2409 #define P4(n) P2(n), P2(n^1), P2(n^1), P2(n)
2410 #define P6(n) P4(n), P4(n^1), P4(n^1), P4(n)
2411
2412 static const bool stress_cpu_parity_table[256] = {
2413 P6(0), P6(1), P6(1), P6(0)
2414 };
2415
2416 /*
2417 * stress_cpu_parity
2418 * compute parity different ways
2419 */
stress_cpu_parity(const char * name)2420 static void stress_cpu_parity(const char *name)
2421 {
2422 uint32_t val = 0x83fb5acf;
2423 size_t i;
2424
2425 for (i = 0; i < 1000; i++, val++) {
2426 register uint32_t parity, p;
2427 uint32_t v;
2428 union {
2429 uint32_t v32;
2430 uint8_t v8[4];
2431 } u;
2432
2433 /*
2434 * Naive way
2435 */
2436 v = val;
2437 parity = 0;
2438 while (v) {
2439 if (v & 1)
2440 parity = !parity;
2441 v >>= 1;
2442 }
2443
2444 /*
2445 * Naive way with Brian Kernigan's bit counting optimisation
2446 * https://graphics.stanford.edu/~seander/bithacks.html
2447 */
2448 v = val;
2449 p = 0;
2450 while (v) {
2451 p = !p;
2452 v = v & (v - 1);
2453 }
2454 if ((g_opt_flags & OPT_FLAGS_VERIFY) && (p != parity))
2455 pr_fail("%s: parity error detected, using "
2456 "optimised naive method\n", name);
2457
2458 /*
2459 * "Compute parity of a word with a multiply"
2460 * the Andrew Shapira method,
2461 * https://graphics.stanford.edu/~seander/bithacks.html
2462 */
2463 v = val;
2464 v ^= v >> 1;
2465 v ^= v >> 2;
2466 v = (v & 0x11111111U) * 0x11111111U;
2467 p = (v >> 28) & 1;
2468 if ((g_opt_flags & OPT_FLAGS_VERIFY) && (p != parity))
2469 pr_fail("%s: parity error detected, using the "
2470 "multiply Shapira method\n", name);
2471
2472 /*
2473 * "Compute parity in parallel"
2474 * https://graphics.stanford.edu/~seander/bithacks.html
2475 */
2476 v = val;
2477 v ^= v >> 16;
2478 v ^= v >> 8;
2479 v ^= v >> 4;
2480 v &= 0xf;
2481 p = (0x6996 >> v) & 1;
2482 if ((g_opt_flags & OPT_FLAGS_VERIFY) && (p != parity))
2483 pr_fail("%s: parity error detected, using "
2484 "the parallel method\n", name);
2485
2486 /*
2487 * "Compute parity by lookup table"
2488 * https://graphics.stanford.edu/~seander/bithacks.html
2489 * Variation #1
2490 */
2491 v = val;
2492 v ^= v >> 16;
2493 v ^= v >> 8;
2494 p = stress_cpu_parity_table[v & 0xff];
2495 if ((g_opt_flags & OPT_FLAGS_VERIFY) && (p != parity))
2496 pr_fail("%s: parity error detected, using "
2497 "the lookup method, variation 1\n", name);
2498
2499 /*
2500 * "Compute parity by lookup table"
2501 * https://graphics.stanford.edu/~seander/bithacks.html
2502 * Variation #2
2503 */
2504 u.v32 = val;
2505 p = stress_cpu_parity_table[u.v8[0] ^ u.v8[1] ^ u.v8[2] ^ u.v8[3]];
2506 if ((g_opt_flags & OPT_FLAGS_VERIFY) && (p != parity))
2507 pr_fail("%s: parity error detected, using the "
2508 "lookup method, variation 2\n", name);
2509 #if defined(HAVE_BUILTIN_PARITY)
2510 /*
2511 * Compute parity using built-in function
2512 */
2513 p = __builtin_parity((unsigned int)val);
2514 if ((g_opt_flags & OPT_FLAGS_VERIFY) && (p != parity))
2515 pr_fail("%s: parity error detected, using "
2516 "the __builtin_parity function\n", name);
2517 #endif
2518 }
2519 }
2520
2521 /*
2522 * stress_cpu_dither
2523 * perform 8 bit to 1 bit gray scale
2524 * Floyd–Steinberg dither
2525 */
stress_cpu_dither(const char * name)2526 static void TARGET_CLONES stress_cpu_dither(const char *name)
2527 {
2528 size_t x, y;
2529
2530 (void)name;
2531
2532 /*
2533 * Generate some random 8 bit image
2534 */
2535 for (y = 0; y < STRESS_CPU_DITHER_Y; y += 8) {
2536 for (x = 0; x < STRESS_CPU_DITHER_X; x ++) {
2537 uint64_t v = stress_mwc64();
2538
2539 pixels[x][y + 0] = (uint8_t)v;
2540 v >>= 8;
2541 pixels[x][y + 1] = (uint8_t)v;
2542 v >>= 8;
2543 pixels[x][y + 2] = (uint8_t)v;
2544 v >>= 8;
2545 pixels[x][y + 3] = (uint8_t)v;
2546 v >>= 8;
2547 pixels[x][y + 4] = (uint8_t)v;
2548 v >>= 8;
2549 pixels[x][y + 5] = (uint8_t)v;
2550 v >>= 8;
2551 pixels[x][y + 6] = (uint8_t)v;
2552 v >>= 8;
2553 pixels[x][y + 7] = (uint8_t)v;
2554 }
2555 }
2556
2557 /*
2558 * ..and dither
2559 */
2560 for (y = 0; y < STRESS_CPU_DITHER_Y; y++) {
2561 for (x = 0; x < STRESS_CPU_DITHER_X; x++) {
2562 uint8_t pixel = pixels[x][y];
2563 uint8_t quant = (pixel < 128) ? 0 : 255;
2564 int32_t error = pixel - quant;
2565
2566 bool xok1 = x < (STRESS_CPU_DITHER_X - 1);
2567 bool xok2 = x > 0;
2568 bool yok1 = y < (STRESS_CPU_DITHER_Y - 1);
2569
2570 if (xok1)
2571 pixels[x + 1][y] +=
2572 (error * 7) >> 4;
2573 if (xok2 && yok1)
2574 pixels[x - 1][y + 1] +=
2575 (error * 3) >> 4;
2576 if (yok1)
2577 pixels[x][y + 1] +=
2578 (error * 5) >> 4;
2579 if (xok1 && yok1)
2580 pixels[x + 1][y + 1] +=
2581 error >> 4;
2582 }
2583 }
2584 }
2585
2586 /*
2587 * stress_cpu_div16
2588 * perform 50000 x 16 bit divisions, these are traditionally
2589 * slow ops
2590 */
stress_cpu_div16(const char * name)2591 static void TARGET_CLONES stress_cpu_div16(const char *name)
2592 {
2593 register uint16_t i, j;
2594 const uint16_t di = 0xdUL;
2595 const uint16_t max = 0xfde8;
2596
2597 (void)name;
2598
2599 for (i = 0, j = 1; i < max; i += di) {
2600 register uint32_t r = i / j;
2601
2602 j = 1 | ((j << 1) ^ j);
2603 stress_uint16_put(r);
2604 }
2605 }
2606
2607 /*
2608 * stress_cpu_div32
2609 * perform 50000 x 32 bit divisions, these are traditionally
2610 * slow ops
2611 */
stress_cpu_div32(const char * name)2612 static void TARGET_CLONES stress_cpu_div32(const char *name)
2613 {
2614 register uint32_t i, j;
2615 const uint32_t di = 0x0014e3dUL;
2616 const uint32_t max = 0xfeff9bd4UL;
2617
2618 (void)name;
2619
2620 for (i = 0, j = 1; i < max; i += di) {
2621 register uint32_t r = i / j;
2622
2623 j = 1 | ((j << 1) ^ j);
2624 stress_uint32_put(r);
2625 }
2626 }
2627
2628 /*
2629 * stress_cpu_div64
2630 * perform 50000 x 64 bit divisions, these are traditionally
2631 * really slow ops
2632 */
stress_cpu_div64(const char * name)2633 static void TARGET_CLONES stress_cpu_div64(const char *name)
2634 {
2635 register uint64_t i, j;
2636 const uint64_t di = 0x000014ced130f7513LL;
2637 const uint64_t dj = 0x000013cba9876543ULL;
2638 const uint64_t max = 0xfe00000000000000ULL;
2639
2640 (void)name;
2641
2642 for (i = 0, j = 0x7fffffffffffULL; i < max; i += di, j -= dj) {
2643 register uint64_t r = i / j;
2644 stress_uint64_put(r);
2645 }
2646 }
2647
2648 /*
2649 * stress_cpu_cpuid()
2650 * get CPU id info, x86 only
2651 * see https://en.wikipedia.org/wiki/CPUID
2652 */
2653 #if defined(STRESS_ARCH_X86)
stress_cpu_cpuid(const char * name)2654 static void TARGET_CLONES stress_cpu_cpuid(const char *name)
2655 {
2656 register int i;
2657
2658 (void)name;
2659
2660 for (i = 0; i < 1000; i++) {
2661 uint32_t eax, ebx, ecx, edx;
2662
2663 /* Highest Function Parameter and Manufacturer ID */
2664 eax = 0;
2665 ebx = 0; /* Not required */
2666 ecx = 0; /* Not required */
2667 edx = 0; /* Not required */
2668 stress_x86_cpuid(&eax, &ebx, &ecx, &edx);
2669 stress_uint32_put(eax);
2670
2671 /* Processor Info and Feature Bits */
2672 eax = 1;
2673 ebx = 0; /* Not required */
2674 ecx = 0; /* Not required */
2675 edx = 0; /* Not required */
2676 stress_x86_cpuid(&eax, &ebx, &ecx, &edx);
2677 stress_uint32_put(eax);
2678
2679 /* Cache and TLB Descriptor information */
2680 eax = 2;
2681 ebx = 0; /* Not required */
2682 ecx = 0; /* Not required */
2683 edx = 0; /* Not required */
2684 stress_x86_cpuid(&eax, &ebx, &ecx, &edx);
2685 stress_uint32_put(eax);
2686
2687 /* Processor Serial Number */
2688 eax = 3;
2689 ebx = 0; /* Not required */
2690 ecx = 0; /* Not required */
2691 edx = 0; /* Not required */
2692 stress_x86_cpuid(&eax, &ebx, &ecx, &edx);
2693 stress_uint32_put(eax);
2694
2695 /* Intel thread/core and cache topology */
2696 eax = 4;
2697 ebx = 0; /* Not required */
2698 ecx = 0; /* Not required */
2699 edx = 0; /* Not required */
2700 stress_x86_cpuid(&eax, &ebx, &ecx, &edx);
2701 stress_uint32_put(eax);
2702
2703 /* Thermal and power management */
2704 eax = 6;
2705 ebx = 0; /* Not required */
2706 ecx = 0; /* Not required */
2707 edx = 0; /* Not required */
2708 stress_x86_cpuid(&eax, &ebx, &ecx, &edx);
2709 stress_uint32_put(eax);
2710
2711 /* Extended Features */
2712 eax = 6;
2713 ebx = 0; /* Not required */
2714 ecx = 0; /* Must be 0 */
2715 edx = 0; /* Not required */
2716 stress_x86_cpuid(&eax, &ebx, &ecx, &edx);
2717 stress_uint32_put(eax);
2718
2719 /* Extended Features */
2720 eax = 7;
2721 ebx = 0; /* Not required */
2722 ecx = 0; /* Must be 0 */
2723 edx = 0; /* Not required */
2724 stress_x86_cpuid(&eax, &ebx, &ecx, &edx);
2725 stress_uint32_put(eax);
2726
2727 /* Extended Features */
2728 eax = 7;
2729 ebx = 0; /* Not required */
2730 ecx = 1; /* Must be 1 */
2731 edx = 0; /* Not required */
2732 stress_x86_cpuid(&eax, &ebx, &ecx, &edx);
2733 stress_uint32_put(eax);
2734
2735 /* Intel thread/core and cache topology */
2736 eax = 0xb;
2737 ebx = 0; /* Not required */
2738 ecx = 0; /* Not required */
2739 edx = 0; /* Not required */
2740 stress_x86_cpuid(&eax, &ebx, &ecx, &edx);
2741 stress_uint32_put(eax);
2742
2743 /* Get highest extended function index */
2744 eax = 0x80000000;
2745 ebx = 0; /* Not required */
2746 ecx = 0; /* Not required */
2747 edx = 0; /* Not required */
2748 stress_x86_cpuid(&eax, &ebx, &ecx, &edx);
2749 stress_uint32_put(eax);
2750
2751 /* Extended processor info */
2752 eax = 0x80000001;
2753 ebx = 0; /* Not required */
2754 ecx = 0; /* Not required */
2755 edx = 0; /* Not required */
2756 stress_x86_cpuid(&eax, &ebx, &ecx, &edx);
2757 stress_uint32_put(eax);
2758
2759 /* Processor brand string */
2760 eax = 0x80000002;
2761 ebx = 0; /* Not required */
2762 ecx = 0; /* Not required */
2763 edx = 0; /* Not required */
2764 stress_x86_cpuid(&eax, &ebx, &ecx, &edx);
2765 stress_uint32_put(eax);
2766
2767 /* Processor brand string */
2768 eax = 0x80000003;
2769 ebx = 0; /* Not required */
2770 ecx = 0; /* Not required */
2771 edx = 0; /* Not required */
2772 stress_x86_cpuid(&eax, &ebx, &ecx, &edx);
2773 stress_uint32_put(eax);
2774
2775 /* Processor brand string */
2776 eax = 0x80000004;
2777 ebx = 0; /* Not required */
2778 ecx = 0; /* Not required */
2779 edx = 0; /* Not required */
2780 stress_x86_cpuid(&eax, &ebx, &ecx, &edx);
2781 stress_uint32_put(eax);
2782
2783 /* L1 Cache and TLB Identifiers */
2784 eax = 0x80000005;
2785 ebx = 0; /* Not required */
2786 ecx = 0; /* Not required */
2787 edx = 0; /* Not required */
2788 stress_x86_cpuid(&eax, &ebx, &ecx, &edx);
2789 stress_uint32_put(eax);
2790
2791 /* Extended L2 Cache Features */
2792 eax = 0x80000006;
2793 ebx = 0; /* Not required */
2794 ecx = 0; /* Not required */
2795 edx = 0; /* Not required */
2796 stress_x86_cpuid(&eax, &ebx, &ecx, &edx);
2797 stress_uint32_put(eax);
2798
2799 /* Advanced Power Management information */
2800 eax = 0x80000007;
2801 ebx = 0; /* Not required */
2802 ecx = 0; /* Not required */
2803 edx = 0; /* Not required */
2804 stress_x86_cpuid(&eax, &ebx, &ecx, &edx);
2805 stress_uint32_put(eax);
2806
2807 /* Virtual and Physical address size */
2808 eax = 0x80000008;
2809 ebx = 0; /* Not required */
2810 ecx = 0; /* Not required */
2811 edx = 0; /* Not required */
2812 stress_x86_cpuid(&eax, &ebx, &ecx, &edx);
2813 stress_uint32_put(eax);
2814 }
2815 }
2816 #endif
2817
2818 /*
2819 * stress_cpu_union
2820 * perform bit field operations on a union
2821 */
stress_cpu_union(const char * name)2822 static void TARGET_CLONES stress_cpu_union(const char *name)
2823 {
2824 typedef union {
2825 struct {
2826 uint64_t b1:1;
2827 uint64_t b10:10;
2828 uint64_t b2:2;
2829 uint64_t b9:9;
2830 uint64_t b3:3;
2831 uint64_t b8:8;
2832 uint64_t b4:4;
2833 uint64_t b7:7;
2834 uint64_t b5:5;
2835 uint64_t b6:6;
2836 } bits64;
2837 uint64_t u64:64;
2838 union {
2839 uint8_t b1:1;
2840 uint8_t b7:7;
2841 uint8_t b8:8;
2842 } bits8;
2843 struct {
2844 uint16_t b15:15;
2845 uint16_t b1:1;
2846 } bits16;
2847 struct {
2848 uint32_t b10:10;
2849 uint32_t b20:20;
2850 #if defined(__TINYC__)
2851 uint32_t f:1; /* cppcheck-suppress unusedStructMember */
2852 #else
2853 uint32_t :1; /* cppcheck-suppress unusedStructMember */
2854 #endif
2855 uint32_t b1:1;
2856 } bits32;
2857 uint32_t u32:30;
2858 } stress_u_t;
2859
2860 static stress_u_t u;
2861 size_t i;
2862
2863 (void)name;
2864 for (i = 0; i < 1000; i++) {
2865 u.bits64.b1 ^= 1;
2866 u.bits64.b2--;
2867 u.bits32.b10 ^= ~0;
2868 u.bits64.b3++;
2869 u.bits16.b1--;
2870 u.bits8.b1++;
2871 u.bits64.b4 *= 2;
2872 u.bits32.b20 += 3;
2873 u.u64 += 0x1037fc2ae21ef829ULL;
2874 u.bits64.b6--;
2875 u.bits8.b7 *= 3;
2876 u.bits64.b5 += (u.bits64.b4 << 1);
2877 u.bits32.b1 ^= 1;
2878 u.bits64.b7++;
2879 u.bits8.b8 ^= 0xaa;
2880 u.bits64.b8--;
2881 u.bits16.b15 ^= 0xbeef;
2882 u.bits64.b9++;
2883 u.bits64.b10 *= 5;
2884 u.u32 += 1;
2885 }
2886 }
2887
2888 /*
2889 * Solution from http://www.cl.cam.ac.uk/~mr10/backtrk.pdf
2890 * see section 2.1
2891 */
queens_try(uint32_t left_diag,uint32_t cols,uint32_t right_diag,uint32_t all)2892 static uint32_t queens_try(
2893 uint32_t left_diag,
2894 uint32_t cols,
2895 uint32_t right_diag,
2896 uint32_t all)
2897 {
2898 register uint32_t solutions = 0;
2899 register uint32_t poss = ~(left_diag | cols | right_diag) & all;
2900
2901 while (poss) {
2902 register uint32_t inv = -poss;
2903 register uint32_t bit = poss & inv;
2904 register uint32_t new_cols = cols | bit;
2905
2906 poss -= bit;
2907 solutions += (new_cols == all) ?
2908 1 : queens_try((left_diag | bit) << 1,
2909 new_cols, (right_diag | bit) >> 1, all);
2910 }
2911 return solutions;
2912 }
2913
2914
2915 /*
2916 * stress_cpu_queens
2917 * solve the queens problem for sizes 1..11
2918 */
stress_cpu_queens(const char * name)2919 static void stress_cpu_queens(const char *name)
2920 {
2921 uint32_t all, n;
2922
2923 static const uint32_t queens_solutions[] = {
2924 0, 1, 0, 0, 2, 10, 4, 40, 92, 352, 724, 2680, 14200
2925 };
2926
2927 for (all = 1, n = 1; n < 12; n++) {
2928 const uint32_t solutions = queens_try(0, 0, 0, all);
2929
2930 if ((g_opt_flags & OPT_FLAGS_VERIFY) &&
2931 (solutions != queens_solutions[n]))
2932 pr_fail("%s: queens solution error detected "
2933 "on board size %" PRIu32 "\n",
2934 name, n);
2935 all = (all + all) + 1;
2936 }
2937 }
2938
2939 /*
2940 * stress_cpu_factorial
2941 * find factorials from 1..150 using
2942 * Stirling's and Ramanujan's Approximations.
2943 */
stress_cpu_factorial(const char * name)2944 static void stress_cpu_factorial(const char *name)
2945 {
2946 int n;
2947 long double f = 1.0L;
2948 const long double precision = 1.0e-6L;
2949 const long double sqrt_pi = shim_sqrtl(PI);
2950
2951 for (n = 1; n < 150; n++) {
2952 long double np1 = (long double)(n + 1);
2953 long double fact = roundl(shim_expl(shim_lgammal(np1)));
2954 long double dn;
2955
2956 f *= (long double)n;
2957
2958 /* Stirling */
2959 if ((g_opt_flags & OPT_FLAGS_VERIFY) &&
2960 ((f - fact) / fact > precision)) {
2961 pr_fail("%s: Stirling's approximation of factorial(%d) out of range\n",
2962 name, n);
2963 }
2964
2965 /* Ramanujan */
2966 dn = (long double)n;
2967 fact = sqrt_pi * shim_powl((dn / (long double)M_E), dn);
2968 fact *= shim_powl((((((((8 * dn) + 4)) * dn) + 1) * dn) + 1.0L/30.0L), (1.0L/6.0L));
2969 if ((g_opt_flags & OPT_FLAGS_VERIFY) &&
2970 ((f - fact) / fact > precision)) {
2971 pr_fail("%s: Ramanujan's approximation of factorial(%d) out of range\n",
2972 name, n);
2973 }
2974 }
2975 }
2976
2977 /*
2978 * stress_cpu_stats
2979 * Exercise some standard stats computations on random data
2980 */
stress_cpu_stats(const char * name)2981 static void stress_cpu_stats(const char *name)
2982 {
2983 size_t i;
2984 double data[STATS_MAX];
2985 double min, max, am = 0.0, gm, hm = 0.0, stddev = 0.0;
2986 int64_t expon = 0;
2987 double mant = 1.0;
2988 const double inverse_n = 1.0 / (double)STATS_MAX;
2989
2990 for (i = 0; i < STATS_MAX; i++)
2991 data[i] = ((double)(stress_mwc32() + 1)) / 4294967296.0;
2992
2993 min = max = data[0];
2994
2995 for (i = 0; i < STATS_MAX; i++) {
2996 double d = data[i];
2997 double f;
2998 int e;
2999
3000 f = frexp(d, &e);
3001 mant *= f;
3002 expon += e;
3003
3004 if (min > d)
3005 min = d;
3006 if (max < d)
3007 max = d;
3008
3009 am += d;
3010 hm += 1 / d;
3011 }
3012 /* Arithmetic mean (average) */
3013 am = am / STATS_MAX;
3014 /* Geometric mean */
3015 gm = pow(mant, inverse_n) *
3016 pow(2.0, (double)expon * inverse_n);
3017 /* Harmonic mean */
3018 hm = STATS_MAX / hm;
3019
3020 for (i = 0; i < STATS_MAX; i++) {
3021 double d = data[i] - am;
3022 stddev += (d * d);
3023 }
3024 /* Standard Deviation */
3025 stddev = shim_sqrt(stddev);
3026
3027 stress_double_put(am);
3028 stress_double_put(gm);
3029 stress_double_put(hm);
3030 stress_double_put(stddev);
3031
3032 if (min > hm)
3033 pr_fail("%s: stats: minimum %f > harmonic mean %f\n",
3034 name, min, hm);
3035 if (hm > gm)
3036 pr_fail("%s: stats: harmonic mean %f > geometric mean %f\n",
3037 name, hm, gm);
3038 if (gm > am)
3039 pr_fail("%s: stats: geometric mean %f > arithmetic mean %f\n",
3040 name, gm, am);
3041 if (am > max)
3042 pr_fail("%s: stats: arithmetic mean %f > maximum %f\n",
3043 name, am, max);
3044 }
3045
3046 /*
3047 * stress_cpu_all()
3048 * iterate over all cpu stressors
3049 */
stress_cpu_all(const char * name)3050 static HOT OPTIMIZE3 void stress_cpu_all(const char *name)
3051 {
3052 static int i = 1; /* Skip over stress_cpu_all */
3053
3054 cpu_methods[i++].func(name);
3055 if (!cpu_methods[i].func)
3056 i = 1;
3057 }
3058
3059 /*
3060 * Table of cpu stress methods
3061 */
3062 static const stress_cpu_method_info_t cpu_methods[] = {
3063 { "all", stress_cpu_all }, /* Special "all test */
3064
3065 { "ackermann", stress_cpu_ackermann },
3066 { "apery", stress_cpu_apery },
3067 { "bitops", stress_cpu_bitops },
3068 { "callfunc", stress_cpu_callfunc },
3069 #if defined(HAVE_COMPLEX_H) && \
3070 defined(HAVE_COMPLEX) && \
3071 defined(__STDC_IEC_559_COMPLEX__) &&\
3072 !defined(__UCLIBC__)
3073 { "cdouble", stress_cpu_complex_double },
3074 { "cfloat", stress_cpu_complex_float },
3075 { "clongdouble", stress_cpu_complex_long_double },
3076 #endif
3077 { "collatz", stress_cpu_collatz },
3078 { "correlate", stress_cpu_correlate },
3079 #if defined(STRESS_ARCH_X86)
3080 { "cpuid", stress_cpu_cpuid },
3081 #endif
3082 { "crc16", stress_cpu_crc16 },
3083 #if defined(HAVE_FLOAT_DECIMAL32) && \
3084 !defined(__clang__)
3085 { "decimal32", stress_cpu_decimal32 },
3086 #endif
3087 #if defined(HAVE_FLOAT_DECIMAL64) && \
3088 !defined(__clang__)
3089 { "decimal64", stress_cpu_decimal64 },
3090 #endif
3091 #if defined(HAVE_FLOAT_DECIMAL128) && \
3092 !defined(__clang__)
3093 { "decimal128", stress_cpu_decimal128 },
3094 #endif
3095 { "dither", stress_cpu_dither },
3096 { "div16", stress_cpu_div16 },
3097 { "div32", stress_cpu_div32 },
3098 { "div64", stress_cpu_div64 },
3099 { "djb2a", stress_cpu_djb2a },
3100 { "double", stress_cpu_double },
3101 { "euler", stress_cpu_euler },
3102 { "explog", stress_cpu_explog },
3103 { "factorial", stress_cpu_factorial },
3104 { "fibonacci", stress_cpu_fibonacci },
3105 #if defined(HAVE_COMPLEX_H) && \
3106 defined(HAVE_COMPLEX) && \
3107 defined(__STDC_IEC_559_COMPLEX__) &&\
3108 !defined(__UCLIBC__)
3109 { "fft", stress_cpu_fft },
3110 #endif
3111 { "fletcher16", stress_cpu_fletcher16 },
3112 { "float", stress_cpu_float },
3113 #if defined(HAVE_FLOAT16) && \
3114 !defined(__clang__)
3115 { "float16", stress_cpu_float16 },
3116 #endif
3117 #if defined(HAVE_FLOAT32) && \
3118 !defined(__clang__)
3119 { "float32", stress_cpu_float32 },
3120 #endif
3121 #if defined(HAVE_FLOAT64) && \
3122 !defined(__clang__)
3123 { "float64", stress_cpu_float64 },
3124 #endif
3125 #if defined(HAVE_FLOAT80) && \
3126 !defined(__clang__)
3127 { "float80", stress_cpu_float80 },
3128 #endif
3129 #if defined(HAVE_FLOAT128) && \
3130 !defined(__clang__)
3131 { "float128", stress_cpu_float128 },
3132 #endif
3133 { "floatconversion", stress_cpu_floatconversion },
3134 { "fnv1a", stress_cpu_fnv1a },
3135 { "gamma", stress_cpu_gamma },
3136 { "gcd", stress_cpu_gcd },
3137 { "gray", stress_cpu_gray },
3138 { "hamming", stress_cpu_hamming },
3139 { "hanoi", stress_cpu_hanoi },
3140 { "hyperbolic", stress_cpu_hyperbolic },
3141 { "idct", stress_cpu_idct },
3142 #if defined(HAVE_INT128_T)
3143 { "int128", stress_cpu_int128 },
3144 #endif
3145 { "int64", stress_cpu_int64 },
3146 { "int32", stress_cpu_int32 },
3147 { "int16", stress_cpu_int16 },
3148 { "int8", stress_cpu_int8 },
3149 #if defined(HAVE_INT128_T)
3150 { "int128float", stress_cpu_int128_float },
3151 { "int128double", stress_cpu_int128_double },
3152 { "int128longdouble", stress_cpu_int128_longdouble },
3153 #if defined(HAVE_FLOAT_DECIMAL32) && \
3154 !defined(__clang__)
3155 { "int128decimal32", stress_cpu_int128_decimal32 },
3156 #endif
3157 #if defined(HAVE_FLOAT_DECIMAL64) && \
3158 !defined(__clang__)
3159 { "int128decimal64", stress_cpu_int128_decimal64 },
3160 #endif
3161 #if defined(HAVE_FLOAT_DECIMAL128) && \
3162 !defined(__clang__)
3163 { "int128decimal128", stress_cpu_int128_decimal128 },
3164 #endif
3165 #endif
3166 { "int64float", stress_cpu_int64_float },
3167 { "int64double", stress_cpu_int64_double },
3168 { "int64longdouble", stress_cpu_int64_longdouble },
3169 { "int32float", stress_cpu_int32_float },
3170 { "int32double", stress_cpu_int32_double },
3171 { "int32longdouble", stress_cpu_int32_longdouble },
3172 { "intconversion", stress_cpu_intconversion },
3173 { "ipv4checksum", stress_cpu_ipv4checksum },
3174 { "jenkin", stress_cpu_jenkin },
3175 { "jmp", stress_cpu_jmp },
3176 { "lfsr32", stress_cpu_lfsr32 },
3177 { "ln2", stress_cpu_ln2 },
3178 { "longdouble", stress_cpu_longdouble },
3179 { "loop", stress_cpu_loop },
3180 { "matrixprod", stress_cpu_matrix_prod },
3181 { "murmur3_32", stress_cpu_murmur3_32 },
3182 { "nhash", stress_cpu_nhash },
3183 { "nsqrt", stress_cpu_nsqrt },
3184 { "omega", stress_cpu_omega },
3185 { "parity", stress_cpu_parity },
3186 { "phi", stress_cpu_phi },
3187 { "pi", stress_cpu_pi },
3188 { "pjw", stress_cpu_pjw },
3189 { "prime", stress_cpu_prime },
3190 { "psi", stress_cpu_psi },
3191 { "queens", stress_cpu_queens },
3192 { "rand", stress_cpu_rand },
3193 { "rand48", stress_cpu_rand48 },
3194 { "rgb", stress_cpu_rgb },
3195 { "sdbm", stress_cpu_sdbm },
3196 { "sieve", stress_cpu_sieve },
3197 { "stats", stress_cpu_stats },
3198 { "sqrt", stress_cpu_sqrt },
3199 { "trig", stress_cpu_trig },
3200 { "union", stress_cpu_union },
3201 #if defined(HAVE_COMPLEX_H) && \
3202 defined(HAVE_COMPLEX) && \
3203 defined(__STDC_IEC_559_COMPLEX__) &&\
3204 !defined(__UCLIBC__)
3205 { "zeta", stress_cpu_zeta },
3206 #endif
3207 { NULL, NULL }
3208 };
3209
3210 /*
3211 * stress_set_cpu_method()
3212 * set the default cpu stress method
3213 */
stress_set_cpu_method(const char * name)3214 static int stress_set_cpu_method(const char *name)
3215 {
3216 stress_cpu_method_info_t const *info;
3217
3218 for (info = cpu_methods; info->func; info++) {
3219 if (!strcmp(info->name, name)) {
3220 stress_set_setting("cpu-method", TYPE_ID_UINTPTR_T, &info);
3221 return 0;
3222 }
3223 }
3224
3225 (void)fprintf(stderr, "cpu-method must be one of:");
3226 for (info = cpu_methods; info->func; info++) {
3227 (void)fprintf(stderr, " %s", info->name);
3228 }
3229 (void)fprintf(stderr, "\n");
3230
3231 return -1;
3232 }
3233
3234 /*
3235 * stress_per_cpu_time()
3236 * try to get accurage CPU time from CPUTIME clock,
3237 * or fall back to wall clock time if not possible.
3238 */
stress_per_cpu_time(void)3239 static double stress_per_cpu_time(void)
3240 {
3241 #if defined(CLOCK_PROCESS_CPUTIME_ID)
3242 struct timespec ts;
3243 static bool use_clock_gettime = true;
3244
3245 /*
3246 * Where possible try to get time used on the CPU
3247 * rather than wall clock time to get more accurate
3248 * CPU consumption measurements
3249 */
3250 if (use_clock_gettime) {
3251 if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts) == 0) {
3252 return (double)ts.tv_sec + ((double)ts.tv_nsec) / (double)STRESS_NANOSECOND;
3253 } else {
3254 use_clock_gettime = false;
3255 }
3256 }
3257 #endif
3258 /*
3259 * Can't get CPU clock time, fall back to wall clock time
3260 */
3261 return stress_time_now();
3262 }
3263
3264 /*
3265 * stress_cpu()
3266 * stress CPU by doing floating point math ops
3267 */
stress_cpu(const stress_args_t * args)3268 static int HOT OPTIMIZE3 stress_cpu(const stress_args_t *args)
3269 {
3270 double bias;
3271 const stress_cpu_method_info_t *cpu_method = &cpu_methods[0];
3272 stress_cpu_func func;
3273 int32_t cpu_load = 100;
3274 int32_t cpu_load_slice = -64;
3275
3276 (void)stress_get_setting("cpu-load", &cpu_load);
3277 (void)stress_get_setting("cpu-load-slice", &cpu_load_slice);
3278 (void)stress_get_setting("cpu-method", &cpu_method);
3279
3280 func = cpu_method->func;
3281
3282 pr_dbg("%s using method '%s'\n", args->name, cpu_method->name);
3283
3284 /*
3285 * It is unlikely, but somebody may request to do a zero
3286 * load stress test(!)
3287 */
3288 if (cpu_load == 0) {
3289 (void)sleep((unsigned int)g_opt_timeout);
3290 return EXIT_SUCCESS;
3291 }
3292
3293 stress_set_proc_state(args->name, STRESS_STATE_RUN);
3294
3295 /*
3296 * Normal use case, 100% load, simple spinning on CPU
3297 */
3298 if (cpu_load == 100) {
3299 do {
3300 (void)func(args->name);
3301 inc_counter(args);
3302 } while (keep_stressing(args));
3303 return EXIT_SUCCESS;
3304 }
3305
3306 /*
3307 * More complex percentage CPU utilisation. This is
3308 * not intended to be 100% accurate timing, it is good
3309 * enough for most purposes.
3310 */
3311 bias = 0.0;
3312 do {
3313 double delay, t1, t2;
3314 struct timeval tv;
3315
3316 t1 = stress_per_cpu_time();
3317 if (cpu_load_slice < 0) {
3318 /* < 0 specifies number of iterations to do per slice */
3319 int j;
3320
3321 for (j = 0; j < -cpu_load_slice; j++) {
3322 (void)func(args->name);
3323 if (!keep_stressing_flag())
3324 break;
3325 inc_counter(args);
3326 }
3327 t2 = stress_per_cpu_time();
3328 } else if (cpu_load_slice == 0) {
3329 /* == 0, random time slices */
3330 const uint16_t r = stress_mwc16();
3331 double slice_end = t1 + ((double)r / 131072.0);
3332 do {
3333 (void)func(args->name);
3334 t2 = stress_per_cpu_time();
3335 if (!keep_stressing_flag())
3336 break;
3337 inc_counter(args);
3338 } while (t2 < slice_end);
3339 } else {
3340 /* > 0, time slice in milliseconds */
3341 const double slice_end = t1 + ((double)cpu_load_slice / 1000.0);
3342
3343 do {
3344 (void)func(args->name);
3345 t2 = stress_per_cpu_time();
3346 if (!keep_stressing_flag())
3347 break;
3348 inc_counter(args);
3349 } while (t2 < slice_end);
3350 }
3351
3352 /* Must not calculate this with zero % load */
3353 delay = (((100 - cpu_load) * (t2 - t1)) / (double)cpu_load);
3354 delay -= bias;
3355
3356 /* We may have clock warping so don't sleep for -ve delays */
3357 if (delay < 0.0) {
3358 bias = 0.0;
3359 } else {
3360 /*
3361 * We need to sleep for a small amount of
3362 * time, measurements need to be based on
3363 * wall clock time and NOT on cpu time used.
3364 */
3365 double t3;
3366
3367 t2 = stress_time_now();
3368
3369 tv.tv_sec = (time_t)delay;
3370 tv.tv_usec = (long)((delay - (double)tv.tv_sec) * 1000000.0);
3371 (void)select(0, NULL, NULL, NULL, &tv);
3372 t3 = stress_time_now();
3373 /* Bias takes account of the time to do the delay */
3374 bias = (t3 - t2) - delay;
3375 }
3376 } while (keep_stressing(args));
3377
3378 if (stress_is_affinity_set() && (args->instance == 0)) {
3379 pr_inf("%s: CPU affinity probably set, this can affect CPU loading\n",
3380 args->name);
3381 }
3382
3383 stress_set_proc_state(args->name, STRESS_STATE_DEINIT);
3384
3385 return EXIT_SUCCESS;
3386 }
3387
stress_cpu_set_default(void)3388 static void stress_cpu_set_default(void)
3389 {
3390 stress_set_cpu_method("all");
3391 }
3392
3393 static const stress_opt_set_func_t opt_set_funcs[] = {
3394 { OPT_cpu_load, stress_set_cpu_load },
3395 { OPT_cpu_load_slice, stress_set_cpu_load_slice },
3396 { OPT_cpu_method, stress_set_cpu_method },
3397 { 0, NULL },
3398 };
3399
3400 stressor_info_t stress_cpu_info = {
3401 .stressor = stress_cpu,
3402 .set_default = stress_cpu_set_default,
3403 .class = CLASS_CPU,
3404 .opt_set_funcs = opt_set_funcs,
3405 .help = help
3406 };
3407