1 /*
2  * Copyright 2020 The Emscripten Authors.  All rights reserved.
3  * Emscripten is available under two separate licenses, the MIT license and the
4  * University of Illinois/NCSA Open Source License.  Both these licenses can be
5  * found in the LICENSE file.
6  */
7 // This file uses SSE2 by calling different functions with different interesting inputs and prints the results.
8 // Use a diff tool to compare the results between platforms.
9 
10 #include <emmintrin.h>
11 #define ENABLE_SSE2
12 #include "test_sse.h"
13 
14 bool testNaNBits = true;
15 
16 float *interesting_floats = get_interesting_floats();
17 int numInterestingFloats = sizeof(interesting_floats_)/sizeof(interesting_floats_[0]);
18 uint32_t *interesting_ints = get_interesting_ints();
19 int numInterestingInts = sizeof(interesting_ints_)/sizeof(interesting_ints_[0]);
20 double *interesting_doubles = get_interesting_doubles();
21 int numInterestingDoubles = sizeof(interesting_doubles_)/sizeof(interesting_doubles_[0]);
22 
test_arithmetic()23 void test_arithmetic()
24 {
25 	// SSE2 Arithmetic instructions:
26 	M128i_M128i_M128i(_mm_add_epi16);
27 	M128i_M128i_M128i(_mm_add_epi32);
28 	M128i_M128i_M128i(_mm_add_epi64);
29 	M128i_M128i_M128i(_mm_add_epi8);
30 	testNaNBits = false;
31 	Ret_M128d_M128d(__m128d, _mm_add_pd);
32 	Ret_M128d_M128d(__m128d, _mm_add_sd);
33 	testNaNBits = true;
34 	M128i_M128i_M128i(_mm_adds_epi16);
35 	M128i_M128i_M128i(_mm_adds_epi8);
36 	M128i_M128i_M128i(_mm_adds_epu16);
37 	M128i_M128i_M128i(_mm_adds_epu8);
38 	Ret_M128d_M128d(__m128d, _mm_div_pd);
39 	Ret_M128d_M128d(__m128d, _mm_div_sd);
40 	M128i_M128i_M128i(_mm_madd_epi16);
41 	M128i_M128i_M128i(_mm_mul_epu32);
42 	testNaNBits = false;
43 	Ret_M128d_M128d(__m128d, _mm_mul_pd);
44 	Ret_M128d_M128d(__m128d, _mm_mul_sd);
45 	testNaNBits = true;
46 	M128i_M128i_M128i(_mm_mulhi_epi16);
47 	M128i_M128i_M128i(_mm_mulhi_epu16);
48 	M128i_M128i_M128i(_mm_mullo_epi16);
49 	M128i_M128i_M128i(_mm_sad_epu8);
50 	M128i_M128i_M128i(_mm_sub_epi16);
51 	M128i_M128i_M128i(_mm_sub_epi32);
52 	M128i_M128i_M128i(_mm_sub_epi64);
53 	M128i_M128i_M128i(_mm_sub_epi8);
54 	Ret_M128d_M128d(__m128d, _mm_sub_pd);
55 	Ret_M128d_M128d(__m128d, _mm_sub_sd);
56 	M128i_M128i_M128i(_mm_subs_epi16);
57 	M128i_M128i_M128i(_mm_subs_epi8);
58 	M128i_M128i_M128i(_mm_subs_epu16);
59 	M128i_M128i_M128i(_mm_subs_epu8);
60 }
61 
test_cast()62 void test_cast()
63 {
64 	// SSE2 Cast functions:
65 	Ret_M128d(__m128, _mm_castpd_ps);
66 	Ret_M128d(__m128i, _mm_castpd_si128);
67 	Ret_M128(__m128d, _mm_castps_pd);
68 	Ret_M128(__m128i, _mm_castps_si128);
69 	Ret_M128i(__m128d, _mm_castsi128_pd);
70 	Ret_M128i(__m128, _mm_castsi128_ps);
71 }
72 
test_compare()73 void test_compare()
74 {
75 	// SSE2 Compare instructions:
76 	M128i_M128i_M128i(_mm_cmpeq_epi16);
77 	M128i_M128i_M128i(_mm_cmpeq_epi32);
78 	M128i_M128i_M128i(_mm_cmpeq_epi8);
79 	Ret_M128d_M128d(__m128d, _mm_cmpeq_pd);
80 	Ret_M128d_M128d(__m128d, _mm_cmpeq_sd);
81 	Ret_M128d_M128d(__m128d, _mm_cmpge_pd);
82 	Ret_M128d_M128d(__m128d, _mm_cmpge_sd);
83 	M128i_M128i_M128i(_mm_cmpgt_epi16);
84 	M128i_M128i_M128i(_mm_cmpgt_epi32);
85 	M128i_M128i_M128i(_mm_cmpgt_epi8);
86 	Ret_M128d_M128d(__m128d, _mm_cmpgt_pd);
87 	Ret_M128d_M128d(__m128d, _mm_cmpgt_sd);
88 	Ret_M128d_M128d(__m128d, _mm_cmple_pd);
89 	Ret_M128d_M128d(__m128d, _mm_cmple_sd);
90 	M128i_M128i_M128i(_mm_cmplt_epi16);
91 	M128i_M128i_M128i(_mm_cmplt_epi32);
92 	M128i_M128i_M128i(_mm_cmplt_epi8);
93 	Ret_M128d_M128d(__m128d, _mm_cmplt_pd);
94 	Ret_M128d_M128d(__m128d, _mm_cmplt_sd);
95 	Ret_M128d_M128d(__m128d, _mm_cmpneq_pd);
96 	Ret_M128d_M128d(__m128d, _mm_cmpneq_sd);
97 	Ret_M128d_M128d(__m128d, _mm_cmpnge_pd);
98 	Ret_M128d_M128d(__m128d, _mm_cmpnge_sd);
99 	Ret_M128d_M128d(__m128d, _mm_cmpngt_pd);
100 	Ret_M128d_M128d(__m128d, _mm_cmpngt_sd);
101 	Ret_M128d_M128d(__m128d, _mm_cmpnle_pd);
102 	Ret_M128d_M128d(__m128d, _mm_cmpnle_sd);
103 	Ret_M128d_M128d(__m128d, _mm_cmpnlt_pd);
104 	Ret_M128d_M128d(__m128d, _mm_cmpnlt_sd);
105 	Ret_M128d_M128d(__m128d, _mm_cmpord_pd);
106 	Ret_M128d_M128d(__m128d, _mm_cmpord_sd);
107 	Ret_M128d_M128d(__m128d, _mm_cmpunord_pd);
108 	Ret_M128d_M128d(__m128d, _mm_cmpunord_sd);
109 	Ret_M128d_M128d(int, _mm_comieq_sd);
110 	Ret_M128d_M128d(int, _mm_comige_sd);
111 	Ret_M128d_M128d(int, _mm_comigt_sd);
112 	Ret_M128d_M128d(int, _mm_comile_sd);
113 	Ret_M128d_M128d(int, _mm_comilt_sd);
114 	Ret_M128d_M128d(int, _mm_comineq_sd);
115 	Ret_M128d_M128d(int, _mm_ucomieq_sd);
116 	Ret_M128d_M128d(int, _mm_ucomige_sd);
117 	Ret_M128d_M128d(int, _mm_ucomigt_sd);
118 	Ret_M128d_M128d(int, _mm_ucomile_sd);
119 	Ret_M128d_M128d(int, _mm_ucomilt_sd);
120 	Ret_M128d_M128d(int, _mm_ucomineq_sd);
121 }
122 
test_convert()123 void test_convert()
124 {
125 	// SSE2 Convert instructions:
126 	Ret_M128i(__m128d, _mm_cvtepi32_pd);
127 	Ret_M128i(__m128, _mm_cvtepi32_ps);
128 	Ret_M128d(__m128i, _mm_cvtpd_epi32);
129 	Ret_M128d(__m128, _mm_cvtpd_ps);
130 	Ret_M128(__m128i, _mm_cvtps_epi32);
131 	Ret_M128(__m128d,  _mm_cvtps_pd);
132 	Ret_M128d(double, _mm_cvtsd_f64);
133 	Ret_M128d(int, _mm_cvtsd_si32);
134 	Ret_M128d(int64_t, _mm_cvtsd_si64); // _mm_cvtsd_si64x is an alias to this.
135 	Ret_M128i(int, _mm_cvtsi128_si32);
136 	Ret_M128i(int64_t, _mm_cvtsi128_si64); // _mm_cvtsi128_si64x is an alias to this.
137 	Ret_M128d_int(__m128d, _mm_cvtsi32_sd);
138 	Ret_int(__m128i, _mm_cvtsi32_si128);
139 	Ret_M128d_int64(__m128d, _mm_cvtsi64_sd); // _mm_cvtsi64x_sd is an alias to this.
140 	Ret_int64(__m128i, _mm_cvtsi64_si128); // _mm_cvtsi64x_si128 is an alias to this.
141 	Ret_M128d_M128(__m128d, _mm_cvtss_sd);
142 	Ret_M128d(__m128i, _mm_cvttpd_epi32);
143 	Ret_M128(__m128i, _mm_cvttps_epi32);
144 	Ret_M128d(int, _mm_cvttsd_si32);
145 	Ret_M128d(int64_t, _mm_cvttsd_si64); // _mm_cvttsd_si64x is an alias to this.
146 }
test_elementarymath()147 void test_elementarymath()
148 {
149 	// SSE2 Elementary Math Functions instructions:
150 	Ret_M128d(__m128d, _mm_sqrt_pd);
151 	Ret_M128d_M128d(__m128d, _mm_sqrt_sd);
152 }
153 
test_generalsupport()154 void test_generalsupport()
155 {
156 	// SSE2 General Support instructions:
157 	_mm_clflush(interesting_floats);
158 	_mm_lfence();
159 	_mm_mfence();
160 	_mm_pause();
161 }
162 
test_load()163 void test_load()
164 {
165 	// SSE2 Load functions:
166 	Ret_DoublePtr(__m128d, _mm_load_pd, 2, 2);
167 	Ret_DoublePtr(__m128d, _mm_load_pd1, 1, 1);
168 	Ret_DoublePtr(__m128d, _mm_load_sd, 1, 1);
169 	Ret_IntPtr(__m128i, _mm_load_si128, __m128i*, 4, 4);
170 	Ret_DoublePtr(__m128d, _mm_load1_pd, 1, 1);
171 	Ret_M128d_DoublePtr(__m128d, _mm_loadh_pd, double*, 1, 1);
172 	Ret_IntPtr(__m128i, _mm_loadl_epi64, __m128i*, 2, 1);
173 	Ret_M128d_DoublePtr(__m128d, _mm_loadl_pd, double*, 1, 1);
174 	Ret_DoublePtr(__m128d, _mm_loadr_pd, 2, 2);
175 	Ret_DoublePtr(__m128d, _mm_loadu_pd, 2, 1);
176 	Ret_IntPtr(__m128i, _mm_loadu_si128, __m128i*, 4, 1);
177 	Ret_IntPtr(__m128i, _mm_loadu_si32, __m128i*, 1, 1);
178 }
179 
test_logical()180 void test_logical()
181 {
182 	// SSE2 Logical instructions:
183 	Ret_M128d_M128d(__m128d, _mm_and_pd);
184 	M128i_M128i_M128i(_mm_and_si128);
185 	Ret_M128d_M128d(__m128d, _mm_andnot_pd);
186 	M128i_M128i_M128i(_mm_andnot_si128);
187 	Ret_M128d_M128d(__m128d, _mm_or_pd);
188 	M128i_M128i_M128i(_mm_or_si128);
189 	Ret_M128d_M128d(__m128d, _mm_xor_pd);
190 	M128i_M128i_M128i(_mm_xor_si128);
191 }
192 
test_misc()193 void test_misc()
194 {
195 	// SSE2 Miscellaneous instructions:
196 	Ret_M128i(int, _mm_movemask_epi8);
197 	Ret_M128d(int, _mm_movemask_pd);
198 	M128i_M128i_M128i(_mm_packs_epi16);
199 	M128i_M128i_M128i(_mm_packs_epi32);
200 	M128i_M128i_M128i(_mm_packus_epi16);
201 }
202 
test_move()203 void test_move()
204 {
205 	// SSE2 Move instructions:
206 	Ret_M128i(__m128i, _mm_move_epi64);
207 	Ret_M128d_M128d(__m128d, _mm_move_sd);
208 }
209 
test_probability()210 void test_probability()
211 {
212 	// SSE2 Probability/Statistics instructions:
213 	M128i_M128i_M128i(_mm_avg_epu16);
214 	M128i_M128i_M128i(_mm_avg_epu8);
215 }
216 
test_set()217 void test_set()
218 {
219 /*
220 	// TODO: SSE2 Set functions:
221 	_mm_set_epi16
222 	_mm_set_epi32
223 	_mm_set_epi64
224 	_mm_set_epi64x
225 	_mm_set_epi8
226 	_mm_set_pd
227 	_mm_set_pd1
228 	_mm_set_sd
229 	_mm_set1_epi16
230 	_mm_set1_epi32
231 	_mm_set1_epi64
232 	_mm_set1_epi64x
233 	_mm_set1_epi8
234 	_mm_set1_pd
235 	_mm_setr_epi16
236 	_mm_setr_epi32
237 	_mm_setr_epi64
238 	_mm_setr_epi8
239 	_mm_setr_pd
240 	_mm_setzero_pd
241 	_mm_setzero_si128
242 */
243 }
244 
test_shift()245 void test_shift()
246 {
247 	// SSE2 Shift instructions:
248 	M128i_M128i_M128i(_mm_sll_epi16);
249 	M128i_M128i_M128i(_mm_sll_epi32);
250 	M128i_M128i_M128i(_mm_sll_epi64);
251 	Ret_M128i_Tint(__m128i, _mm_slli_epi16);
252 	Ret_M128i_Tint(__m128i, _mm_slli_epi32);
253 	Ret_M128i_Tint(__m128i, _mm_slli_epi64);
254 	Ret_M128i_Tint(__m128i, _mm_slli_si128); // _mm_bslli_si128 is an alias to this.
255 	M128i_M128i_M128i(_mm_sra_epi16);
256 	M128i_M128i_M128i(_mm_sra_epi32);
257 	Ret_M128i_Tint(__m128i, _mm_srai_epi16);
258 	Ret_M128i_Tint(__m128i, _mm_srai_epi32);
259 	M128i_M128i_M128i(_mm_srl_epi16);
260 	M128i_M128i_M128i(_mm_srl_epi32);
261 	M128i_M128i_M128i(_mm_srl_epi64);
262 	Ret_M128i_Tint(__m128i, _mm_srli_epi16);
263 	Ret_M128i_Tint(__m128i, _mm_srli_epi32);
264 	Ret_M128i_Tint(__m128i, _mm_srli_epi64);
265 	Ret_M128i_Tint(__m128i, _mm_srli_si128); // _mm_bsrli_si128 is an alias to this.
266 }
267 
test_specialmath()268 void test_specialmath()
269 {
270 	// SSE2 Special Math instructions:
271 	M128i_M128i_M128i(_mm_max_epi16);
272 	M128i_M128i_M128i(_mm_max_epu8);
273 	Ret_M128d_M128d(__m128d, _mm_max_pd);
274 	Ret_M128d_M128d(__m128d, _mm_max_sd);
275 	M128i_M128i_M128i(_mm_min_epi16);
276 	M128i_M128i_M128i(_mm_min_epu8);
277 	Ret_M128d_M128d(__m128d, _mm_min_pd);
278 	Ret_M128d_M128d(__m128d, _mm_min_sd);
279 }
280 
test_store()281 void test_store()
282 {
283 	// SSE2 Store instructions:
284 	void_M128i_M128i_OutIntPtr(_mm_maskmoveu_si128, char*, 16, 1);
285 	void_OutDoublePtr_M128d(_mm_store_pd, double*, 16, 16);
286 	void_OutDoublePtr_M128d(_mm_store_sd, double*, 8, 1);
287 	void_OutIntPtr_M128i(_mm_store_si128, __m128i*, 16, 16);
288 	void_OutDoublePtr_M128d(_mm_store1_pd, double*, 16, 16); // _mm_store_pd1 is an alias to this.
289 	void_OutDoublePtr_M128d(_mm_storeh_pd, double*, 8, 1);
290 	void_OutIntPtr_M128i(_mm_storel_epi64, __m128i*, 8, 1);
291 	void_OutDoublePtr_M128d(_mm_storel_pd, double*, 8, 1);
292 	void_OutDoublePtr_M128d(_mm_storer_pd, double*, 16, 16);
293 	void_OutDoublePtr_M128d(_mm_storeu_pd, double*, 16, 1);
294 	void_OutIntPtr_M128i(_mm_storeu_si32, __m128i*, 4, 1);
295 	void_OutIntPtr_M128i(_mm_storeu_si128, __m128i*, 16, 1);
296 	void_OutDoublePtr_M128d(_mm_stream_pd, double*, 16, 16);
297 	void_OutIntPtr_M128i(_mm_stream_si128, __m128i*, 16, 16);
298 	void_OutIntPtr_int(_mm_stream_si32, int*, 4, 1);
299 	void_OutIntPtr_int64(_mm_stream_si64, long long*, 8, 1);
300 }
301 
test_swizzle()302 void test_swizzle()
303 {
304 	// SSE2 Swizzle instructions:
305 	Ret_M128i_Tint(int, _mm_extract_epi16);
306 	Ret_M128i_int_Tint(__m128i, _mm_insert_epi16);
307 	Ret_M128i_Tint(__m128i, _mm_shuffle_epi32);
308 	Ret_M128d_M128d_Tint(__m128d, _mm_shuffle_pd);
309 	Ret_M128i_Tint(__m128i, _mm_shufflehi_epi16);
310 	Ret_M128i_Tint(__m128i, _mm_shufflelo_epi16);
311 	M128i_M128i_M128i(_mm_unpackhi_epi16);
312 	M128i_M128i_M128i(_mm_unpackhi_epi32);
313 	M128i_M128i_M128i(_mm_unpackhi_epi64);
314 	M128i_M128i_M128i(_mm_unpackhi_epi8);
315 	Ret_M128d_M128d(__m128d, _mm_unpackhi_pd);
316 	M128i_M128i_M128i(_mm_unpacklo_epi16);
317 	M128i_M128i_M128i(_mm_unpacklo_epi32);
318 	M128i_M128i_M128i(_mm_unpacklo_epi64);
319 	M128i_M128i_M128i(_mm_unpacklo_epi8);
320 	Ret_M128d_M128d(__m128d, _mm_unpacklo_pd);
321 }
322 
main()323 int main()
324 {
325 	assert(numInterestingFloats % 4 == 0);
326 	assert(numInterestingInts % 4 == 0);
327 	assert(numInterestingDoubles % 4 == 0);
328 
329 	test_arithmetic();
330 	test_cast();
331 	test_compare();
332 	test_convert();
333 	test_elementarymath();
334 	test_generalsupport();
335 	test_load();
336 	test_logical();
337 	test_misc();
338 	test_move();
339 	test_probability();
340 	test_set();
341 	test_shift();
342 	test_specialmath();
343 	test_store();
344 	test_swizzle();
345 
346 #ifdef __EMSCRIPTEN__
347 	_mm_undefined_si128();
348 	_mm_undefined_pd();
349 #endif
350 }
351