1 /*
2 * Copyright (C) 2013-2021 Canonical, Ltd.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 *
18 * This code is a complete clean re-write of the stress tool by
19 * Colin Ian King <colin.king@canonical.com> and attempts to be
20 * backwardly compatible with the stress tool by Amos Waterland
21 * <apw@rossby.metr.ou.edu> but has more stress tests and more
22 * functionality.
23 *
24 */
25 #include "stress-ng.h"
26
27 static const stress_help_t help[] = {
28 { NULL, "vecmath N", "start N workers performing vector math ops" },
29 { NULL, "vecmath-ops N", "stop after N vector math bogo operations" },
30 { NULL, NULL, NULL }
31 };
32
33 /*
34 * Clang 5.0 is the lowest version of clang that
35 * can build this without issues (clang 4.0 seems
36 * to spend forever optimizing this and causes the build
37 * to never complete)
38 */
39 #if defined(__clang__) && \
40 defined(__clang_major__) && \
41 __clang_major__ < 5
42 #undef HAVE_VECMATH
43 #endif
44
45 /*
46 * gcc 5.x or earlier breaks on 128 bit vector maths on
47 * PPC64 for some reason with some flavours of the toolchain
48 * so disable this test for now
49 */
50 #if defined(STRESS_ARCH_PPC64) && \
51 defined(__GNUC__) && \
52 __GNUC__ < 6
53 #undef HAVE_VECMATH
54 #endif
55
56 #if defined(HAVE_VECMATH)
57
58 typedef int8_t stress_vint8_t __attribute__ ((vector_size (16)));
59 typedef int16_t stress_vint16_t __attribute__ ((vector_size (16)));
60 typedef int32_t stress_vint32_t __attribute__ ((vector_size (16)));
61 typedef int64_t stress_vint64_t __attribute__ ((vector_size (16)));
62 #if defined(HAVE_INT128_T)
63 typedef __uint128_t stress_vint128_t __attribute__ ((vector_size (16)));
64 #endif
65
66 /*
67 * Convert various sized n * 8 bit tuples into n * 8 bit integers
68 */
69 #define H8(a0) \
70 ((int8_t)((uint8_t)a0))
71 #define H16(a0, a1) \
72 ((int16_t)(((uint16_t)a0 << 8) | \
73 ((uint16_t)a1 << 0)))
74 #define H32(a0, a1, a2, a3) \
75 ((int32_t)(((uint32_t)a0 << 24) | \
76 ((uint32_t)a1 << 16) | \
77 ((uint32_t)a2 << 8) | \
78 ((uint32_t)a3 << 0)))
79 #define H64(a0, a1, a2, a3, a4, a5, a6, a7) \
80 ((int64_t)(((uint64_t)a0 << 56) | \
81 ((uint64_t)a1 << 48) | \
82 ((uint64_t)a2 << 40) | \
83 ((uint64_t)a3 << 32) | \
84 ((uint64_t)a4 << 24) | \
85 ((uint64_t)a5 << 16) | \
86 ((uint64_t)a6 << 8) | \
87 ((uint64_t)a7 << 0)))
88
89 #if defined(HAVE_INT128_T)
90 #define H128(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, ab, ac, ad, ae, af) \
91 ((__int128_t)(((__int128_t)a0 << 120) | \
92 ((__int128_t)a1 << 112) | \
93 ((__int128_t)a2 << 104) | \
94 ((__int128_t)a3 << 96) | \
95 ((__int128_t)a4 << 88) | \
96 ((__int128_t)a5 << 80) | \
97 ((__int128_t)a6 << 72) | \
98 ((__int128_t)a7 << 64) | \
99 ((__int128_t)a8 << 56) | \
100 ((__int128_t)a9 << 48) | \
101 ((__int128_t)aa << 40) | \
102 ((__int128_t)ab << 32) | \
103 ((__int128_t)ac << 24) | \
104 ((__int128_t)ad << 16) | \
105 ((__int128_t)ae << 8) | \
106 ((__int128_t)af << 0))) \
107
108 #endif
109
110 /*
111 * 128 bit constants
112 */
113 #define A(M) M(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
114 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00)
115
116 #define B(M) M(0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, \
117 0x0f, 0x1e, 0x2d, 0x3c, 0x4b, 0x5a, 0x69, 0x78)
118
119 #define C(M) M(0x01, 0x02, 0x03, 0x02, 0x01, 0x02, 0x03, 0x02, \
120 0x03, 0x02, 0x01, 0x02, 0x03, 0x02, 0x01, 0x02)
121
122 #define S(M) M(0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, \
123 0x01, 0x01, 0x02, 0x02, 0x01, 0x01, 0x02, 0x02)
124
125 #define V23(M) M(0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, \
126 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17)
127
128 #define V3(M) M(0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, \
129 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03)
130
131 /*
132 * Convert 16 x 8 bit values into various sized 128 bit vectors
133 */
134 #define INT16x8(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, ab, ac, ad, ae, af) \
135 H8(a0), H8(a1), H8(a2), H8(a3), H8(a4), H8(a5), H8(a6), H8(a7), \
136 H8(a8), H8(a9), H8(aa), H8(ab), H8(ac), H8(ad), H8(ae), H8(af)
137
138 #define INT8x16(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, ab, ac, ad, ae, af) \
139 H16(a0, a1), H16(a2, a3), H16(a4, a5), H16(a6, a7), \
140 H16(a8, a9), H16(aa, ab), H16(ac, ad), H16(ae, af)
141
142 #define INT4x32(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, ab, ac, ad, ae, af) \
143 H32(a0, a1, a2, a3), H32(a4, a5, a6, a7), \
144 H32(a8, a9, aa, ab), H32(ac, ad, ae, af)
145
146 #define INT2x64(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, ab, ac, ad, ae, af) \
147 H64(a0, a1, a2, a3, a4, a5, a6, a7), \
148 H64(a8, a9, aa, ab, ac, ad, ae, af)
149
150 #if defined(HAVE_INT128_T)
151 #define INT1x128(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, ab, ac, ad, ae, af)\
152 H128(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, ab, ac, ad, ae, af)
153 #endif
154
155 /*
156 * Operations to run on each vector
157 */
158 #define OPS(a, b, c, s, v23, v3) \
159 do { \
160 a += b; \
161 a |= b; \
162 a -= b; \
163 a &= ~b; \
164 a *= c; \
165 a = ~a; \
166 a *= s; \
167 a ^= c; \
168 a <<= 1; \
169 b >>= 1; \
170 b += c; \
171 a %= v23; \
172 c /= v3; \
173 b = b ^ c; \
174 c = b ^ c; \
175 b = b ^ c; \
176 } while (0)
177
178 /*
179 * stress_vecmath()
180 * stress GCC vector maths
181 */
182 #if defined(STRESS_ARCH_PPC64)
stress_vecmath(const stress_args_t * args)183 static int HOT stress_vecmath(const stress_args_t *args)
184 #else
185 static int HOT TARGET_CLONES stress_vecmath(const stress_args_t *args)
186 #endif
187 {
188 stress_vint8_t a8 = { A(INT16x8) };
189 stress_vint8_t b8 = { B(INT16x8) };
190 stress_vint8_t c8 = { C(INT16x8) };
191 stress_vint8_t s8 = { S(INT16x8) };
192 const stress_vint8_t v23_8 = { V23(INT16x8) };
193 const stress_vint8_t v3_8 = { V3(INT16x8) };
194
195 stress_vint16_t a16 = { A(INT8x16) };
196 stress_vint16_t b16 = { B(INT8x16) };
197 stress_vint16_t c16 = { C(INT8x16) };
198 stress_vint16_t s16 = { S(INT8x16) };
199 const stress_vint16_t v23_16 = { V23(INT8x16) };
200 const stress_vint16_t v3_16 = { V3(INT8x16) };
201
202 stress_vint32_t a32 = { A(INT4x32) };
203 stress_vint32_t b32 = { B(INT4x32) };
204 stress_vint32_t c32 = { C(INT4x32) };
205 stress_vint32_t s32 = { S(INT4x32) };
206 const stress_vint32_t v23_32 = { V23(INT4x32) };
207 const stress_vint32_t v3_32 = { V3(INT4x32) };
208
209 stress_vint64_t a64 = { A(INT2x64) };
210 stress_vint64_t b64 = { B(INT2x64) };
211 stress_vint64_t c64 = { C(INT2x64) };
212 stress_vint64_t s64 = { S(INT2x64) };
213 const stress_vint64_t v23_64 = { V23(INT2x64) };
214 const stress_vint64_t v3_64 = { V3(INT2x64) };
215
216 #if defined(HAVE_INT128_T)
217 stress_vint128_t a128 = { A(INT1x128) };
218 stress_vint128_t b128 = { B(INT1x128) };
219 stress_vint128_t c128 = { C(INT1x128) };
220 stress_vint128_t s128 = { S(INT1x128) };
221 const stress_vint128_t v23_128 = { V23(INT1x128) };
222 const stress_vint128_t v3_128 = { V3(INT1x128) };
223 #endif
224
225 stress_set_proc_state(args->name, STRESS_STATE_RUN);
226
227 do {
228 int i;
229 for (i = 1000; i; i--) {
230 /* Good mix of vector ops */
231 OPS(a8, b8, c8, s8, v23_8, v3_8);
232 OPS(a16, b16, c16, s16, v23_16, v3_16);
233 OPS(a32, b32, c32, s32, v23_32, v3_32);
234 OPS(a64, b64, c64, s64, v23_64, v3_64);
235 #if defined(HAVE_INT128_T)
236 OPS(a128, b128, c128, s128, v23_128, v3_128);
237 #endif
238
239 OPS(a32, b32, c32, s32, v23_32, v3_32);
240 OPS(a16, b16, c16, s16, v23_16, v3_16);
241 #if defined(HAVE_INT128_T)
242 OPS(a128, b128, c128, s128, v23_128, v3_128);
243 #endif
244 OPS(a8, b8, c8, s8, v23_8, v3_8);
245 OPS(a64, b64, c64, s64, v23_64, v3_64);
246
247 OPS(a8, b8, c8, s8, v23_8, v3_8);
248 OPS(a8, b8, c8, s8, v23_8, v3_8);
249 OPS(a8, b8, c8, s8, v23_8, v3_8);
250 OPS(a8, b8, c8, s8, v23_8, v3_8);
251
252 OPS(a16, b16, c16, s16, v23_16, v3_16);
253 OPS(a16, b16, c16, s16, v23_16, v3_16);
254 OPS(a16, b16, c16, s16, v23_16, v3_16);
255 OPS(a16, b16, c16, s16, v23_16, v3_16);
256
257 OPS(a32, b32, c32, s32, v23_32, v3_32);
258 OPS(a32, b32, c32, s32, v23_32, v3_32);
259 OPS(a32, b32, c32, s32, v23_32, v3_32);
260 OPS(a32, b32, c32, s32, v23_32, v3_32);
261
262 OPS(a64, b64, c64, s64, v23_64, v3_64);
263 OPS(a64, b64, c64, s64, v23_64, v3_64);
264 OPS(a64, b64, c64, s64, v23_64, v3_64);
265 OPS(a64, b64, c64, s64, v23_64, v3_64);
266 #if defined(HAVE_INT128_T)
267 OPS(a128, b128, c128, s128, v23_128, v3_128);
268 OPS(a128, b128, c128, s128, v23_128, v3_128);
269 OPS(a128, b128, c128, s128, v23_128, v3_128);
270 OPS(a128, b128, c128, s128, v23_128, v3_128);
271 #endif
272 }
273 inc_counter(args);
274 } while (keep_stressing(args));
275
276 /* Forces the compiler to actually compute the terms */
277 stress_uint8_put((uint8_t)(a8[0] ^ a8[1] ^ a8[2] ^ a8[3] ^
278 a8[4] ^ a8[5] ^ a8[6] ^ a8[7] ^
279 a8[8] ^ a8[9] ^ a8[10] ^ a8[11] ^
280 a8[12] ^ a8[13] ^ a8[14] ^ a8[15]));
281 stress_uint16_put((uint16_t)(a16[0] ^ a16[1] ^ a16[2] ^ a16[3] ^
282 a16[4] ^ a16[5] ^ a16[6] ^ a16[7]));
283 stress_uint32_put((uint32_t)(a32[0] ^ a32[1] ^ a32[2] ^ a32[3]));
284 stress_uint64_put((uint64_t)(a64[0] ^ a64[1]));
285
286 #if defined(HAVE_INT128_T)
287 stress_uint128_put(a128[0]);
288 #endif
289 stress_set_proc_state(args->name, STRESS_STATE_DEINIT);
290
291 return EXIT_SUCCESS;
292 }
293
294 stressor_info_t stress_vecmath_info = {
295 .stressor = stress_vecmath,
296 .class = CLASS_CPU | CLASS_CPU_CACHE,
297 .help = help
298 };
299 #else
300 stressor_info_t stress_vecmath_info = {
301 .stressor = stress_not_implemented,
302 .class = CLASS_CPU | CLASS_CPU_CACHE,
303 .help = help
304 };
305 #endif
306