1 /*
2 * Copyright 2020 The Emscripten Authors. All rights reserved.
3 * Emscripten is available under two separate licenses, the MIT license and the
4 * University of Illinois/NCSA Open Source License. Both these licenses can be
5 * found in the LICENSE file.
6 */
7 // This file uses SSE2 by calling different functions with different interesting inputs and prints the results.
8 // Use a diff tool to compare the results between platforms.
9
10 #include <emmintrin.h>
11 #define ENABLE_SSE2
12 #include "test_sse.h"
13
14 bool testNaNBits = true;
15
16 float *interesting_floats = get_interesting_floats();
17 int numInterestingFloats = sizeof(interesting_floats_)/sizeof(interesting_floats_[0]);
18 uint32_t *interesting_ints = get_interesting_ints();
19 int numInterestingInts = sizeof(interesting_ints_)/sizeof(interesting_ints_[0]);
20 double *interesting_doubles = get_interesting_doubles();
21 int numInterestingDoubles = sizeof(interesting_doubles_)/sizeof(interesting_doubles_[0]);
22
test_arithmetic()23 void test_arithmetic()
24 {
25 // SSE2 Arithmetic instructions:
26 M128i_M128i_M128i(_mm_add_epi16);
27 M128i_M128i_M128i(_mm_add_epi32);
28 M128i_M128i_M128i(_mm_add_epi64);
29 M128i_M128i_M128i(_mm_add_epi8);
30 testNaNBits = false;
31 Ret_M128d_M128d(__m128d, _mm_add_pd);
32 Ret_M128d_M128d(__m128d, _mm_add_sd);
33 testNaNBits = true;
34 M128i_M128i_M128i(_mm_adds_epi16);
35 M128i_M128i_M128i(_mm_adds_epi8);
36 M128i_M128i_M128i(_mm_adds_epu16);
37 M128i_M128i_M128i(_mm_adds_epu8);
38 Ret_M128d_M128d(__m128d, _mm_div_pd);
39 Ret_M128d_M128d(__m128d, _mm_div_sd);
40 M128i_M128i_M128i(_mm_madd_epi16);
41 M128i_M128i_M128i(_mm_mul_epu32);
42 testNaNBits = false;
43 Ret_M128d_M128d(__m128d, _mm_mul_pd);
44 Ret_M128d_M128d(__m128d, _mm_mul_sd);
45 testNaNBits = true;
46 M128i_M128i_M128i(_mm_mulhi_epi16);
47 M128i_M128i_M128i(_mm_mulhi_epu16);
48 M128i_M128i_M128i(_mm_mullo_epi16);
49 M128i_M128i_M128i(_mm_sad_epu8);
50 M128i_M128i_M128i(_mm_sub_epi16);
51 M128i_M128i_M128i(_mm_sub_epi32);
52 M128i_M128i_M128i(_mm_sub_epi64);
53 M128i_M128i_M128i(_mm_sub_epi8);
54 Ret_M128d_M128d(__m128d, _mm_sub_pd);
55 Ret_M128d_M128d(__m128d, _mm_sub_sd);
56 M128i_M128i_M128i(_mm_subs_epi16);
57 M128i_M128i_M128i(_mm_subs_epi8);
58 M128i_M128i_M128i(_mm_subs_epu16);
59 M128i_M128i_M128i(_mm_subs_epu8);
60 }
61
test_cast()62 void test_cast()
63 {
64 // SSE2 Cast functions:
65 Ret_M128d(__m128, _mm_castpd_ps);
66 Ret_M128d(__m128i, _mm_castpd_si128);
67 Ret_M128(__m128d, _mm_castps_pd);
68 Ret_M128(__m128i, _mm_castps_si128);
69 Ret_M128i(__m128d, _mm_castsi128_pd);
70 Ret_M128i(__m128, _mm_castsi128_ps);
71 }
72
test_compare()73 void test_compare()
74 {
75 // SSE2 Compare instructions:
76 M128i_M128i_M128i(_mm_cmpeq_epi16);
77 M128i_M128i_M128i(_mm_cmpeq_epi32);
78 M128i_M128i_M128i(_mm_cmpeq_epi8);
79 Ret_M128d_M128d(__m128d, _mm_cmpeq_pd);
80 Ret_M128d_M128d(__m128d, _mm_cmpeq_sd);
81 Ret_M128d_M128d(__m128d, _mm_cmpge_pd);
82 Ret_M128d_M128d(__m128d, _mm_cmpge_sd);
83 M128i_M128i_M128i(_mm_cmpgt_epi16);
84 M128i_M128i_M128i(_mm_cmpgt_epi32);
85 M128i_M128i_M128i(_mm_cmpgt_epi8);
86 Ret_M128d_M128d(__m128d, _mm_cmpgt_pd);
87 Ret_M128d_M128d(__m128d, _mm_cmpgt_sd);
88 Ret_M128d_M128d(__m128d, _mm_cmple_pd);
89 Ret_M128d_M128d(__m128d, _mm_cmple_sd);
90 M128i_M128i_M128i(_mm_cmplt_epi16);
91 M128i_M128i_M128i(_mm_cmplt_epi32);
92 M128i_M128i_M128i(_mm_cmplt_epi8);
93 Ret_M128d_M128d(__m128d, _mm_cmplt_pd);
94 Ret_M128d_M128d(__m128d, _mm_cmplt_sd);
95 Ret_M128d_M128d(__m128d, _mm_cmpneq_pd);
96 Ret_M128d_M128d(__m128d, _mm_cmpneq_sd);
97 Ret_M128d_M128d(__m128d, _mm_cmpnge_pd);
98 Ret_M128d_M128d(__m128d, _mm_cmpnge_sd);
99 Ret_M128d_M128d(__m128d, _mm_cmpngt_pd);
100 Ret_M128d_M128d(__m128d, _mm_cmpngt_sd);
101 Ret_M128d_M128d(__m128d, _mm_cmpnle_pd);
102 Ret_M128d_M128d(__m128d, _mm_cmpnle_sd);
103 Ret_M128d_M128d(__m128d, _mm_cmpnlt_pd);
104 Ret_M128d_M128d(__m128d, _mm_cmpnlt_sd);
105 Ret_M128d_M128d(__m128d, _mm_cmpord_pd);
106 Ret_M128d_M128d(__m128d, _mm_cmpord_sd);
107 Ret_M128d_M128d(__m128d, _mm_cmpunord_pd);
108 Ret_M128d_M128d(__m128d, _mm_cmpunord_sd);
109 Ret_M128d_M128d(int, _mm_comieq_sd);
110 Ret_M128d_M128d(int, _mm_comige_sd);
111 Ret_M128d_M128d(int, _mm_comigt_sd);
112 Ret_M128d_M128d(int, _mm_comile_sd);
113 Ret_M128d_M128d(int, _mm_comilt_sd);
114 Ret_M128d_M128d(int, _mm_comineq_sd);
115 Ret_M128d_M128d(int, _mm_ucomieq_sd);
116 Ret_M128d_M128d(int, _mm_ucomige_sd);
117 Ret_M128d_M128d(int, _mm_ucomigt_sd);
118 Ret_M128d_M128d(int, _mm_ucomile_sd);
119 Ret_M128d_M128d(int, _mm_ucomilt_sd);
120 Ret_M128d_M128d(int, _mm_ucomineq_sd);
121 }
122
test_convert()123 void test_convert()
124 {
125 // SSE2 Convert instructions:
126 Ret_M128i(__m128d, _mm_cvtepi32_pd);
127 Ret_M128i(__m128, _mm_cvtepi32_ps);
128 Ret_M128d(__m128i, _mm_cvtpd_epi32);
129 Ret_M128d(__m128, _mm_cvtpd_ps);
130 Ret_M128(__m128i, _mm_cvtps_epi32);
131 Ret_M128(__m128d, _mm_cvtps_pd);
132 Ret_M128d(double, _mm_cvtsd_f64);
133 Ret_M128d(int, _mm_cvtsd_si32);
134 Ret_M128d(int64_t, _mm_cvtsd_si64); // _mm_cvtsd_si64x is an alias to this.
135 Ret_M128i(int, _mm_cvtsi128_si32);
136 Ret_M128i(int64_t, _mm_cvtsi128_si64); // _mm_cvtsi128_si64x is an alias to this.
137 Ret_M128d_int(__m128d, _mm_cvtsi32_sd);
138 Ret_int(__m128i, _mm_cvtsi32_si128);
139 Ret_M128d_int64(__m128d, _mm_cvtsi64_sd); // _mm_cvtsi64x_sd is an alias to this.
140 Ret_int64(__m128i, _mm_cvtsi64_si128); // _mm_cvtsi64x_si128 is an alias to this.
141 Ret_M128d_M128(__m128d, _mm_cvtss_sd);
142 Ret_M128d(__m128i, _mm_cvttpd_epi32);
143 Ret_M128(__m128i, _mm_cvttps_epi32);
144 Ret_M128d(int, _mm_cvttsd_si32);
145 Ret_M128d(int64_t, _mm_cvttsd_si64); // _mm_cvttsd_si64x is an alias to this.
146 }
test_elementarymath()147 void test_elementarymath()
148 {
149 // SSE2 Elementary Math Functions instructions:
150 Ret_M128d(__m128d, _mm_sqrt_pd);
151 Ret_M128d_M128d(__m128d, _mm_sqrt_sd);
152 }
153
test_generalsupport()154 void test_generalsupport()
155 {
156 // SSE2 General Support instructions:
157 _mm_clflush(interesting_floats);
158 _mm_lfence();
159 _mm_mfence();
160 _mm_pause();
161 }
162
test_load()163 void test_load()
164 {
165 // SSE2 Load functions:
166 Ret_DoublePtr(__m128d, _mm_load_pd, 2, 2);
167 Ret_DoublePtr(__m128d, _mm_load_pd1, 1, 1);
168 Ret_DoublePtr(__m128d, _mm_load_sd, 1, 1);
169 Ret_IntPtr(__m128i, _mm_load_si128, __m128i*, 4, 4);
170 Ret_DoublePtr(__m128d, _mm_load1_pd, 1, 1);
171 Ret_M128d_DoublePtr(__m128d, _mm_loadh_pd, double*, 1, 1);
172 Ret_IntPtr(__m128i, _mm_loadl_epi64, __m128i*, 2, 1);
173 Ret_M128d_DoublePtr(__m128d, _mm_loadl_pd, double*, 1, 1);
174 Ret_DoublePtr(__m128d, _mm_loadr_pd, 2, 2);
175 Ret_DoublePtr(__m128d, _mm_loadu_pd, 2, 1);
176 Ret_IntPtr(__m128i, _mm_loadu_si128, __m128i*, 4, 1);
177 Ret_IntPtr(__m128i, _mm_loadu_si32, __m128i*, 1, 1);
178 }
179
test_logical()180 void test_logical()
181 {
182 // SSE2 Logical instructions:
183 Ret_M128d_M128d(__m128d, _mm_and_pd);
184 M128i_M128i_M128i(_mm_and_si128);
185 Ret_M128d_M128d(__m128d, _mm_andnot_pd);
186 M128i_M128i_M128i(_mm_andnot_si128);
187 Ret_M128d_M128d(__m128d, _mm_or_pd);
188 M128i_M128i_M128i(_mm_or_si128);
189 Ret_M128d_M128d(__m128d, _mm_xor_pd);
190 M128i_M128i_M128i(_mm_xor_si128);
191 }
192
test_misc()193 void test_misc()
194 {
195 // SSE2 Miscellaneous instructions:
196 Ret_M128i(int, _mm_movemask_epi8);
197 Ret_M128d(int, _mm_movemask_pd);
198 M128i_M128i_M128i(_mm_packs_epi16);
199 M128i_M128i_M128i(_mm_packs_epi32);
200 M128i_M128i_M128i(_mm_packus_epi16);
201 }
202
test_move()203 void test_move()
204 {
205 // SSE2 Move instructions:
206 Ret_M128i(__m128i, _mm_move_epi64);
207 Ret_M128d_M128d(__m128d, _mm_move_sd);
208 }
209
test_probability()210 void test_probability()
211 {
212 // SSE2 Probability/Statistics instructions:
213 M128i_M128i_M128i(_mm_avg_epu16);
214 M128i_M128i_M128i(_mm_avg_epu8);
215 }
216
test_set()217 void test_set()
218 {
219 /*
220 // TODO: SSE2 Set functions:
221 _mm_set_epi16
222 _mm_set_epi32
223 _mm_set_epi64
224 _mm_set_epi64x
225 _mm_set_epi8
226 _mm_set_pd
227 _mm_set_pd1
228 _mm_set_sd
229 _mm_set1_epi16
230 _mm_set1_epi32
231 _mm_set1_epi64
232 _mm_set1_epi64x
233 _mm_set1_epi8
234 _mm_set1_pd
235 _mm_setr_epi16
236 _mm_setr_epi32
237 _mm_setr_epi64
238 _mm_setr_epi8
239 _mm_setr_pd
240 _mm_setzero_pd
241 _mm_setzero_si128
242 */
243 }
244
test_shift()245 void test_shift()
246 {
247 // SSE2 Shift instructions:
248 M128i_M128i_M128i(_mm_sll_epi16);
249 M128i_M128i_M128i(_mm_sll_epi32);
250 M128i_M128i_M128i(_mm_sll_epi64);
251 Ret_M128i_Tint(__m128i, _mm_slli_epi16);
252 Ret_M128i_Tint(__m128i, _mm_slli_epi32);
253 Ret_M128i_Tint(__m128i, _mm_slli_epi64);
254 Ret_M128i_Tint(__m128i, _mm_slli_si128); // _mm_bslli_si128 is an alias to this.
255 M128i_M128i_M128i(_mm_sra_epi16);
256 M128i_M128i_M128i(_mm_sra_epi32);
257 Ret_M128i_Tint(__m128i, _mm_srai_epi16);
258 Ret_M128i_Tint(__m128i, _mm_srai_epi32);
259 M128i_M128i_M128i(_mm_srl_epi16);
260 M128i_M128i_M128i(_mm_srl_epi32);
261 M128i_M128i_M128i(_mm_srl_epi64);
262 Ret_M128i_Tint(__m128i, _mm_srli_epi16);
263 Ret_M128i_Tint(__m128i, _mm_srli_epi32);
264 Ret_M128i_Tint(__m128i, _mm_srli_epi64);
265 Ret_M128i_Tint(__m128i, _mm_srli_si128); // _mm_bsrli_si128 is an alias to this.
266 }
267
test_specialmath()268 void test_specialmath()
269 {
270 // SSE2 Special Math instructions:
271 M128i_M128i_M128i(_mm_max_epi16);
272 M128i_M128i_M128i(_mm_max_epu8);
273 Ret_M128d_M128d(__m128d, _mm_max_pd);
274 Ret_M128d_M128d(__m128d, _mm_max_sd);
275 M128i_M128i_M128i(_mm_min_epi16);
276 M128i_M128i_M128i(_mm_min_epu8);
277 Ret_M128d_M128d(__m128d, _mm_min_pd);
278 Ret_M128d_M128d(__m128d, _mm_min_sd);
279 }
280
test_store()281 void test_store()
282 {
283 // SSE2 Store instructions:
284 void_M128i_M128i_OutIntPtr(_mm_maskmoveu_si128, char*, 16, 1);
285 void_OutDoublePtr_M128d(_mm_store_pd, double*, 16, 16);
286 void_OutDoublePtr_M128d(_mm_store_sd, double*, 8, 1);
287 void_OutIntPtr_M128i(_mm_store_si128, __m128i*, 16, 16);
288 void_OutDoublePtr_M128d(_mm_store1_pd, double*, 16, 16); // _mm_store_pd1 is an alias to this.
289 void_OutDoublePtr_M128d(_mm_storeh_pd, double*, 8, 1);
290 void_OutIntPtr_M128i(_mm_storel_epi64, __m128i*, 8, 1);
291 void_OutDoublePtr_M128d(_mm_storel_pd, double*, 8, 1);
292 void_OutDoublePtr_M128d(_mm_storer_pd, double*, 16, 16);
293 void_OutDoublePtr_M128d(_mm_storeu_pd, double*, 16, 1);
294 void_OutIntPtr_M128i(_mm_storeu_si32, __m128i*, 4, 1);
295 void_OutIntPtr_M128i(_mm_storeu_si128, __m128i*, 16, 1);
296 void_OutDoublePtr_M128d(_mm_stream_pd, double*, 16, 16);
297 void_OutIntPtr_M128i(_mm_stream_si128, __m128i*, 16, 16);
298 void_OutIntPtr_int(_mm_stream_si32, int*, 4, 1);
299 void_OutIntPtr_int64(_mm_stream_si64, long long*, 8, 1);
300 }
301
test_swizzle()302 void test_swizzle()
303 {
304 // SSE2 Swizzle instructions:
305 Ret_M128i_Tint(int, _mm_extract_epi16);
306 Ret_M128i_int_Tint(__m128i, _mm_insert_epi16);
307 Ret_M128i_Tint(__m128i, _mm_shuffle_epi32);
308 Ret_M128d_M128d_Tint(__m128d, _mm_shuffle_pd);
309 Ret_M128i_Tint(__m128i, _mm_shufflehi_epi16);
310 Ret_M128i_Tint(__m128i, _mm_shufflelo_epi16);
311 M128i_M128i_M128i(_mm_unpackhi_epi16);
312 M128i_M128i_M128i(_mm_unpackhi_epi32);
313 M128i_M128i_M128i(_mm_unpackhi_epi64);
314 M128i_M128i_M128i(_mm_unpackhi_epi8);
315 Ret_M128d_M128d(__m128d, _mm_unpackhi_pd);
316 M128i_M128i_M128i(_mm_unpacklo_epi16);
317 M128i_M128i_M128i(_mm_unpacklo_epi32);
318 M128i_M128i_M128i(_mm_unpacklo_epi64);
319 M128i_M128i_M128i(_mm_unpacklo_epi8);
320 Ret_M128d_M128d(__m128d, _mm_unpacklo_pd);
321 }
322
main()323 int main()
324 {
325 assert(numInterestingFloats % 4 == 0);
326 assert(numInterestingInts % 4 == 0);
327 assert(numInterestingDoubles % 4 == 0);
328
329 test_arithmetic();
330 test_cast();
331 test_compare();
332 test_convert();
333 test_elementarymath();
334 test_generalsupport();
335 test_load();
336 test_logical();
337 test_misc();
338 test_move();
339 test_probability();
340 test_set();
341 test_shift();
342 test_specialmath();
343 test_store();
344 test_swizzle();
345
346 #ifdef __EMSCRIPTEN__
347 _mm_undefined_si128();
348 _mm_undefined_pd();
349 #endif
350 }
351