1 /*
2  * Copyright (C) 2013-2021 Canonical, Ltd.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * This code is a complete clean re-write of the stress tool by
19  * Colin Ian King <colin.king@canonical.com> and attempts to be
20  * backwardly compatible with the stress tool by Amos Waterland
21  * <apw@rossby.metr.ou.edu> but has more stress tests and more
22  * functionality.
23  *
24  */
25 #include "stress-ng.h"
26 
27 static const stress_help_t help[] = {
28 	{ NULL,	"vecmath N",	 "start N workers performing vector math ops" },
29 	{ NULL,	"vecmath-ops N", "stop after N vector math bogo operations" },
30 	{ NULL,	NULL,		 NULL }
31 };
32 
33 /*
34  *  Clang 5.0 is the lowest version of clang that
35  *  can build this without issues (clang 4.0 seems
36  *  to spend forever optimizing this and causes the build
37  *  to never complete)
38  */
39 #if defined(__clang__) && \
40     defined(__clang_major__) && \
41     __clang_major__ < 5
42 #undef HAVE_VECMATH
43 #endif
44 
45 /*
46  *  gcc 5.x or earlier breaks on 128 bit vector maths on
47  *  PPC64 for some reason with some flavours of the toolchain
48  *  so disable this test for now
49  */
50 #if defined(STRESS_ARCH_PPC64) && \
51     defined(__GNUC__) && \
52     __GNUC__ < 6
53 #undef HAVE_VECMATH
54 #endif
55 
56 #if defined(HAVE_VECMATH)
57 
58 typedef int8_t  stress_vint8_t  __attribute__ ((vector_size (16)));
59 typedef int16_t stress_vint16_t __attribute__ ((vector_size (16)));
60 typedef int32_t stress_vint32_t __attribute__ ((vector_size (16)));
61 typedef int64_t stress_vint64_t __attribute__ ((vector_size (16)));
62 #if defined(HAVE_INT128_T)
63 typedef __uint128_t stress_vint128_t __attribute__ ((vector_size (16)));
64 #endif
65 
66 /*
67  *  Convert various sized n * 8 bit tuples into n * 8 bit integers
68  */
69 #define H8(a0)						\
70 	((int8_t)((uint8_t)a0))
71 #define H16(a0, a1)     				\
72 	((int16_t)(((uint16_t)a0 << 8) |		\
73 		   ((uint16_t)a1 << 0)))
74 #define H32(a0, a1, a2, a3)				\
75 	((int32_t)(((uint32_t)a0 << 24) |		\
76 		   ((uint32_t)a1 << 16) |		\
77 		   ((uint32_t)a2 <<  8) |		\
78 		   ((uint32_t)a3 <<  0)))
79 #define H64(a0, a1, a2, a3, a4, a5, a6, a7)		\
80 	((int64_t)(((uint64_t)a0 << 56) |		\
81 		   ((uint64_t)a1 << 48) |		\
82 		   ((uint64_t)a2 << 40) |		\
83 		   ((uint64_t)a3 << 32) |		\
84 		   ((uint64_t)a4 << 24) |		\
85 		   ((uint64_t)a5 << 16) |		\
86 		   ((uint64_t)a6 <<  8) | 		\
87 		   ((uint64_t)a7 <<  0)))
88 
89 #if defined(HAVE_INT128_T)
90 #define H128(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, ab, ac, ad, ae, af)	\
91 	((__int128_t)(((__int128_t)a0 << 120) |		\
92 		     ((__int128_t)a1 << 112) |		\
93 		     ((__int128_t)a2 << 104) |		\
94 		     ((__int128_t)a3 <<  96) |		\
95 		     ((__int128_t)a4 <<  88) |		\
96 		     ((__int128_t)a5 <<  80) |		\
97 		     ((__int128_t)a6 <<  72) |		\
98 		     ((__int128_t)a7 <<  64) |		\
99 		     ((__int128_t)a8 <<  56) |		\
100 		     ((__int128_t)a9 <<  48) |		\
101 		     ((__int128_t)aa <<  40) |		\
102 		     ((__int128_t)ab <<  32) |		\
103 		     ((__int128_t)ac <<  24) |		\
104 		     ((__int128_t)ad <<  16) |		\
105 		     ((__int128_t)ae <<   8) |		\
106 		     ((__int128_t)af <<   0)))		\
107 
108 #endif
109 
110 /*
111  *  128 bit constants
112  */
113 #define A(M)	M(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	\
114 		  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00)
115 
116 #define B(M)	M(0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef,	\
117 		  0x0f, 0x1e, 0x2d, 0x3c, 0x4b, 0x5a, 0x69, 0x78)
118 
119 #define C(M)	M(0x01, 0x02, 0x03, 0x02, 0x01, 0x02, 0x03, 0x02,	\
120 		  0x03, 0x02, 0x01, 0x02, 0x03, 0x02, 0x01, 0x02)
121 
122 #define S(M)	M(0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02,	\
123 		  0x01, 0x01, 0x02, 0x02, 0x01, 0x01, 0x02, 0x02)
124 
125 #define	V23(M)	M(0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17,	\
126 		  0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17)
127 
128 #define V3(M)	M(0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,	\
129 		  0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03)
130 
131 /*
132  *  Convert 16 x 8 bit values into various sized 128 bit vectors
133  */
134 #define INT16x8(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, ab, ac, ad, ae, af)	\
135 	H8(a0), H8(a1), H8(a2), H8(a3), H8(a4), H8(a5), H8(a6), H8(a7),		\
136 	H8(a8), H8(a9), H8(aa), H8(ab), H8(ac), H8(ad), H8(ae), H8(af)
137 
138 #define INT8x16(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, ab, ac, ad, ae, af)	\
139 	H16(a0, a1), H16(a2, a3), H16(a4, a5), H16(a6, a7),                     \
140 	H16(a8, a9), H16(aa, ab), H16(ac, ad), H16(ae, af)
141 
142 #define INT4x32(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, ab, ac, ad, ae, af)	\
143 	H32(a0, a1, a2, a3), H32(a4, a5, a6, a7),				\
144 	H32(a8, a9, aa, ab), H32(ac, ad, ae, af)
145 
146 #define INT2x64(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, ab, ac, ad, ae, af)	\
147 	H64(a0, a1, a2, a3, a4, a5, a6, a7),					\
148 	H64(a8, a9, aa, ab, ac, ad, ae, af)
149 
150 #if defined(HAVE_INT128_T)
151 #define INT1x128(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, ab, ac, ad, ae, af)\
152 	H128(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, ab, ac, ad, ae, af)
153 #endif
154 
155 /*
156  *  Operations to run on each vector
157  */
158 #define OPS(a, b, c, s, v23, v3) \
159 do {				\
160 	a += b;			\
161 	a |= b;			\
162 	a -= b;			\
163 	a &= ~b;		\
164 	a *= c;			\
165 	a = ~a;			\
166 	a *= s;			\
167 	a ^= c;			\
168 	a <<= 1;		\
169 	b >>= 1;		\
170 	b += c;			\
171 	a %= v23;		\
172 	c /= v3;		\
173 	b = b ^ c;		\
174 	c = b ^ c;		\
175 	b = b ^ c;		\
176 } while (0)
177 
178 /*
179  *  stress_vecmath()
180  *	stress GCC vector maths
181  */
182 #if defined(STRESS_ARCH_PPC64)
stress_vecmath(const stress_args_t * args)183 static int HOT stress_vecmath(const stress_args_t *args)
184 #else
185 static int HOT TARGET_CLONES stress_vecmath(const stress_args_t *args)
186 #endif
187 {
188 	stress_vint8_t a8 = { A(INT16x8) };
189 	stress_vint8_t b8 = { B(INT16x8) };
190 	stress_vint8_t c8 = { C(INT16x8) };
191 	stress_vint8_t s8 = { S(INT16x8) };
192 	const stress_vint8_t v23_8 = { V23(INT16x8) };
193 	const stress_vint8_t v3_8 = { V3(INT16x8) };
194 
195 	stress_vint16_t a16 = { A(INT8x16) };
196 	stress_vint16_t b16 = { B(INT8x16) };
197 	stress_vint16_t c16 = { C(INT8x16) };
198 	stress_vint16_t s16 = { S(INT8x16) };
199 	const stress_vint16_t v23_16 = { V23(INT8x16) };
200 	const stress_vint16_t v3_16 = { V3(INT8x16) };
201 
202 	stress_vint32_t a32 = { A(INT4x32) };
203 	stress_vint32_t b32 = { B(INT4x32) };
204 	stress_vint32_t c32 = { C(INT4x32) };
205 	stress_vint32_t s32 = { S(INT4x32) };
206 	const stress_vint32_t v23_32 = { V23(INT4x32) };
207 	const stress_vint32_t v3_32 = { V3(INT4x32) };
208 
209 	stress_vint64_t a64 = { A(INT2x64) };
210 	stress_vint64_t b64 = { B(INT2x64) };
211 	stress_vint64_t c64 = { C(INT2x64) };
212 	stress_vint64_t s64 = { S(INT2x64) };
213 	const stress_vint64_t v23_64 = { V23(INT2x64) };
214 	const stress_vint64_t v3_64 = { V3(INT2x64) };
215 
216 #if defined(HAVE_INT128_T)
217 	stress_vint128_t a128 = { A(INT1x128) };
218 	stress_vint128_t b128 = { B(INT1x128) };
219 	stress_vint128_t c128 = { C(INT1x128) };
220 	stress_vint128_t s128 = { S(INT1x128) };
221 	const stress_vint128_t v23_128 = { V23(INT1x128) };
222 	const stress_vint128_t v3_128 = { V3(INT1x128) };
223 #endif
224 
225 	stress_set_proc_state(args->name, STRESS_STATE_RUN);
226 
227 	do {
228 		int i;
229 		for (i = 1000; i; i--) {
230 			/* Good mix of vector ops */
231 			OPS(a8, b8, c8, s8, v23_8, v3_8);
232 			OPS(a16, b16, c16, s16, v23_16, v3_16);
233 			OPS(a32, b32, c32, s32, v23_32, v3_32);
234 			OPS(a64, b64, c64, s64, v23_64, v3_64);
235 #if defined(HAVE_INT128_T)
236 			OPS(a128, b128, c128, s128, v23_128, v3_128);
237 #endif
238 
239 			OPS(a32, b32, c32, s32, v23_32, v3_32);
240 			OPS(a16, b16, c16, s16, v23_16, v3_16);
241 #if defined(HAVE_INT128_T)
242 			OPS(a128, b128, c128, s128, v23_128, v3_128);
243 #endif
244 			OPS(a8, b8, c8, s8, v23_8, v3_8);
245 			OPS(a64, b64, c64, s64, v23_64, v3_64);
246 
247 			OPS(a8, b8, c8, s8, v23_8, v3_8);
248 			OPS(a8, b8, c8, s8, v23_8, v3_8);
249 			OPS(a8, b8, c8, s8, v23_8, v3_8);
250 			OPS(a8, b8, c8, s8, v23_8, v3_8);
251 
252 			OPS(a16, b16, c16, s16, v23_16, v3_16);
253 			OPS(a16, b16, c16, s16, v23_16, v3_16);
254 			OPS(a16, b16, c16, s16, v23_16, v3_16);
255 			OPS(a16, b16, c16, s16, v23_16, v3_16);
256 
257 			OPS(a32, b32, c32, s32, v23_32, v3_32);
258 			OPS(a32, b32, c32, s32, v23_32, v3_32);
259 			OPS(a32, b32, c32, s32, v23_32, v3_32);
260 			OPS(a32, b32, c32, s32, v23_32, v3_32);
261 
262 			OPS(a64, b64, c64, s64, v23_64, v3_64);
263 			OPS(a64, b64, c64, s64, v23_64, v3_64);
264 			OPS(a64, b64, c64, s64, v23_64, v3_64);
265 			OPS(a64, b64, c64, s64, v23_64, v3_64);
266 #if defined(HAVE_INT128_T)
267 			OPS(a128, b128, c128, s128, v23_128, v3_128);
268 			OPS(a128, b128, c128, s128, v23_128, v3_128);
269 			OPS(a128, b128, c128, s128, v23_128, v3_128);
270 			OPS(a128, b128, c128, s128, v23_128, v3_128);
271 #endif
272 		}
273 		inc_counter(args);
274 	} while (keep_stressing(args));
275 
276 	/* Forces the compiler to actually compute the terms */
277 	stress_uint8_put((uint8_t)(a8[0]  ^ a8[1]  ^ a8[2]  ^ a8[3]  ^
278 				   a8[4]  ^ a8[5]  ^ a8[6]  ^ a8[7]  ^
279 				   a8[8]  ^ a8[9]  ^ a8[10] ^ a8[11] ^
280 				   a8[12] ^ a8[13] ^ a8[14] ^ a8[15]));
281 	stress_uint16_put((uint16_t)(a16[0] ^ a16[1] ^ a16[2] ^ a16[3] ^
282 				     a16[4] ^ a16[5] ^ a16[6] ^ a16[7]));
283 	stress_uint32_put((uint32_t)(a32[0] ^ a32[1] ^ a32[2] ^ a32[3]));
284 	stress_uint64_put((uint64_t)(a64[0] ^ a64[1]));
285 
286 #if defined(HAVE_INT128_T)
287 	stress_uint128_put(a128[0]);
288 #endif
289 	stress_set_proc_state(args->name, STRESS_STATE_DEINIT);
290 
291 	return EXIT_SUCCESS;
292 }
293 
294 stressor_info_t stress_vecmath_info = {
295 	.stressor = stress_vecmath,
296 	.class = CLASS_CPU | CLASS_CPU_CACHE,
297 	.help = help
298 };
299 #else
300 stressor_info_t stress_vecmath_info = {
301 	.stressor = stress_not_implemented,
302 	.class = CLASS_CPU | CLASS_CPU_CACHE,
303 	.help = help
304 };
305 #endif
306