1 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s
2 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s
3 // RUN: %clang_cc1 -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s
4
5
6 #include <immintrin.h>
7
8 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
9
test_mm_add_epi8(__m128i A,__m128i B)10 __m128i test_mm_add_epi8(__m128i A, __m128i B) {
11 // CHECK-LABEL: test_mm_add_epi8
12 // CHECK: add <16 x i8>
13 return _mm_add_epi8(A, B);
14 }
15
test_mm_add_epi16(__m128i A,__m128i B)16 __m128i test_mm_add_epi16(__m128i A, __m128i B) {
17 // CHECK-LABEL: test_mm_add_epi16
18 // CHECK: add <8 x i16>
19 return _mm_add_epi16(A, B);
20 }
21
test_mm_add_epi32(__m128i A,__m128i B)22 __m128i test_mm_add_epi32(__m128i A, __m128i B) {
23 // CHECK-LABEL: test_mm_add_epi32
24 // CHECK: add <4 x i32>
25 return _mm_add_epi32(A, B);
26 }
27
test_mm_add_epi64(__m128i A,__m128i B)28 __m128i test_mm_add_epi64(__m128i A, __m128i B) {
29 // CHECK-LABEL: test_mm_add_epi64
30 // CHECK: add <2 x i64>
31 return _mm_add_epi64(A, B);
32 }
33
test_mm_add_pd(__m128d A,__m128d B)34 __m128d test_mm_add_pd(__m128d A, __m128d B) {
35 // CHECK-LABEL: test_mm_add_pd
36 // CHECK: fadd <2 x double>
37 return _mm_add_pd(A, B);
38 }
39
test_mm_add_sd(__m128d A,__m128d B)40 __m128d test_mm_add_sd(__m128d A, __m128d B) {
41 // CHECK-LABEL: test_mm_add_sd
42 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
43 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
44 // CHECK: fadd double
45 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
46 return _mm_add_sd(A, B);
47 }
48
test_mm_adds_epi8(__m128i A,__m128i B)49 __m128i test_mm_adds_epi8(__m128i A, __m128i B) {
50 // CHECK-LABEL: test_mm_adds_epi8
51 // CHECK: call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
52 return _mm_adds_epi8(A, B);
53 }
54
test_mm_adds_epi16(__m128i A,__m128i B)55 __m128i test_mm_adds_epi16(__m128i A, __m128i B) {
56 // CHECK-LABEL: test_mm_adds_epi16
57 // CHECK: call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
58 return _mm_adds_epi16(A, B);
59 }
60
test_mm_adds_epu8(__m128i A,__m128i B)61 __m128i test_mm_adds_epu8(__m128i A, __m128i B) {
62 // CHECK-LABEL: test_mm_adds_epu8
63 // CHECK-NOT: call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
64 // CHECK: call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
65 return _mm_adds_epu8(A, B);
66 }
67
test_mm_adds_epu16(__m128i A,__m128i B)68 __m128i test_mm_adds_epu16(__m128i A, __m128i B) {
69 // CHECK-LABEL: test_mm_adds_epu16
70 // CHECK-NOT: call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
71 // CHECK: call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
72 return _mm_adds_epu16(A, B);
73 }
74
test_mm_and_pd(__m128d A,__m128d B)75 __m128d test_mm_and_pd(__m128d A, __m128d B) {
76 // CHECK-LABEL: test_mm_and_pd
77 // CHECK: and <2 x i64>
78 return _mm_and_pd(A, B);
79 }
80
test_mm_and_si128(__m128i A,__m128i B)81 __m128i test_mm_and_si128(__m128i A, __m128i B) {
82 // CHECK-LABEL: test_mm_and_si128
83 // CHECK: and <2 x i64>
84 return _mm_and_si128(A, B);
85 }
86
test_mm_andnot_pd(__m128d A,__m128d B)87 __m128d test_mm_andnot_pd(__m128d A, __m128d B) {
88 // CHECK-LABEL: test_mm_andnot_pd
89 // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
90 // CHECK: and <2 x i64>
91 return _mm_andnot_pd(A, B);
92 }
93
test_mm_andnot_si128(__m128i A,__m128i B)94 __m128i test_mm_andnot_si128(__m128i A, __m128i B) {
95 // CHECK-LABEL: test_mm_andnot_si128
96 // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
97 // CHECK: and <2 x i64>
98 return _mm_andnot_si128(A, B);
99 }
100
test_mm_avg_epu8(__m128i A,__m128i B)101 __m128i test_mm_avg_epu8(__m128i A, __m128i B) {
102 // CHECK-LABEL: test_mm_avg_epu8
103 // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
104 return _mm_avg_epu8(A, B);
105 }
106
test_mm_avg_epu16(__m128i A,__m128i B)107 __m128i test_mm_avg_epu16(__m128i A, __m128i B) {
108 // CHECK-LABEL: test_mm_avg_epu16
109 // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
110 return _mm_avg_epu16(A, B);
111 }
112
test_mm_bslli_si128(__m128i A)113 __m128i test_mm_bslli_si128(__m128i A) {
114 // CHECK-LABEL: test_mm_bslli_si128
115 // CHECK: shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
116 return _mm_bslli_si128(A, 5);
117 }
118
test_mm_bsrli_si128(__m128i A)119 __m128i test_mm_bsrli_si128(__m128i A) {
120 // CHECK-LABEL: test_mm_bsrli_si128
121 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
122 return _mm_bsrli_si128(A, 5);
123 }
124
test_mm_castpd_ps(__m128d A)125 __m128 test_mm_castpd_ps(__m128d A) {
126 // CHECK-LABEL: test_mm_castpd_ps
127 // CHECK: bitcast <2 x double> %{{.*}} to <4 x float>
128 return _mm_castpd_ps(A);
129 }
130
test_mm_castpd_si128(__m128d A)131 __m128i test_mm_castpd_si128(__m128d A) {
132 // CHECK-LABEL: test_mm_castpd_si128
133 // CHECK: bitcast <2 x double> %{{.*}} to <2 x i64>
134 return _mm_castpd_si128(A);
135 }
136
test_mm_castps_pd(__m128 A)137 __m128d test_mm_castps_pd(__m128 A) {
138 // CHECK-LABEL: test_mm_castps_pd
139 // CHECK: bitcast <4 x float> %{{.*}} to <2 x double>
140 return _mm_castps_pd(A);
141 }
142
test_mm_castps_si128(__m128 A)143 __m128i test_mm_castps_si128(__m128 A) {
144 // CHECK-LABEL: test_mm_castps_si128
145 // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64>
146 return _mm_castps_si128(A);
147 }
148
test_mm_castsi128_pd(__m128i A)149 __m128d test_mm_castsi128_pd(__m128i A) {
150 // CHECK-LABEL: test_mm_castsi128_pd
151 // CHECK: bitcast <2 x i64> %{{.*}} to <2 x double>
152 return _mm_castsi128_pd(A);
153 }
154
test_mm_castsi128_ps(__m128i A)155 __m128 test_mm_castsi128_ps(__m128i A) {
156 // CHECK-LABEL: test_mm_castsi128_ps
157 // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
158 return _mm_castsi128_ps(A);
159 }
160
test_mm_clflush(void * A)161 void test_mm_clflush(void* A) {
162 // CHECK-LABEL: test_mm_clflush
163 // CHECK: call void @llvm.x86.sse2.clflush(i8* %{{.*}})
164 _mm_clflush(A);
165 }
166
test_mm_cmpeq_epi8(__m128i A,__m128i B)167 __m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) {
168 // CHECK-LABEL: test_mm_cmpeq_epi8
169 // CHECK: icmp eq <16 x i8>
170 return _mm_cmpeq_epi8(A, B);
171 }
172
test_mm_cmpeq_epi16(__m128i A,__m128i B)173 __m128i test_mm_cmpeq_epi16(__m128i A, __m128i B) {
174 // CHECK-LABEL: test_mm_cmpeq_epi16
175 // CHECK: icmp eq <8 x i16>
176 return _mm_cmpeq_epi16(A, B);
177 }
178
test_mm_cmpeq_epi32(__m128i A,__m128i B)179 __m128i test_mm_cmpeq_epi32(__m128i A, __m128i B) {
180 // CHECK-LABEL: test_mm_cmpeq_epi32
181 // CHECK: icmp eq <4 x i32>
182 return _mm_cmpeq_epi32(A, B);
183 }
184
test_mm_cmpeq_pd(__m128d A,__m128d B)185 __m128d test_mm_cmpeq_pd(__m128d A, __m128d B) {
186 // CHECK-LABEL: test_mm_cmpeq_pd
187 // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double>
188 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
189 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
190 // CHECK-NEXT: ret <2 x double> [[BC]]
191 return _mm_cmpeq_pd(A, B);
192 }
193
test_mm_cmpeq_sd(__m128d A,__m128d B)194 __m128d test_mm_cmpeq_sd(__m128d A, __m128d B) {
195 // CHECK-LABEL: test_mm_cmpeq_sd
196 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
197 return _mm_cmpeq_sd(A, B);
198 }
199
test_mm_cmpge_pd(__m128d A,__m128d B)200 __m128d test_mm_cmpge_pd(__m128d A, __m128d B) {
201 // CHECK-LABEL: test_mm_cmpge_pd
202 // CHECK: [[CMP:%.*]] = fcmp ole <2 x double>
203 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
204 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
205 // CHECK-NEXT: ret <2 x double> [[BC]]
206 return _mm_cmpge_pd(A, B);
207 }
208
test_mm_cmpge_sd(__m128d A,__m128d B)209 __m128d test_mm_cmpge_sd(__m128d A, __m128d B) {
210 // CHECK-LABEL: test_mm_cmpge_sd
211 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
212 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
213 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
214 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
215 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
216 return _mm_cmpge_sd(A, B);
217 }
218
test_mm_cmpgt_epi8(__m128i A,__m128i B)219 __m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) {
220 // CHECK-LABEL: test_mm_cmpgt_epi8
221 // CHECK: icmp sgt <16 x i8>
222 return _mm_cmpgt_epi8(A, B);
223 }
224
test_mm_cmpgt_epi16(__m128i A,__m128i B)225 __m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) {
226 // CHECK-LABEL: test_mm_cmpgt_epi16
227 // CHECK: icmp sgt <8 x i16>
228 return _mm_cmpgt_epi16(A, B);
229 }
230
test_mm_cmpgt_epi32(__m128i A,__m128i B)231 __m128i test_mm_cmpgt_epi32(__m128i A, __m128i B) {
232 // CHECK-LABEL: test_mm_cmpgt_epi32
233 // CHECK: icmp sgt <4 x i32>
234 return _mm_cmpgt_epi32(A, B);
235 }
236
test_mm_cmpgt_pd(__m128d A,__m128d B)237 __m128d test_mm_cmpgt_pd(__m128d A, __m128d B) {
238 // CHECK-LABEL: test_mm_cmpgt_pd
239 // CHECK: [[CMP:%.*]] = fcmp olt <2 x double>
240 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
241 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
242 // CHECK-NEXT: ret <2 x double> [[BC]]
243 return _mm_cmpgt_pd(A, B);
244 }
245
test_mm_cmpgt_sd(__m128d A,__m128d B)246 __m128d test_mm_cmpgt_sd(__m128d A, __m128d B) {
247 // CHECK-LABEL: test_mm_cmpgt_sd
248 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
249 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
250 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
251 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
252 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
253 return _mm_cmpgt_sd(A, B);
254 }
255
test_mm_cmple_pd(__m128d A,__m128d B)256 __m128d test_mm_cmple_pd(__m128d A, __m128d B) {
257 // CHECK-LABEL: test_mm_cmple_pd
258 // CHECK: [[CMP:%.*]] = fcmp ole <2 x double>
259 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
260 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
261 // CHECK-NEXT: ret <2 x double> [[BC]]
262 return _mm_cmple_pd(A, B);
263 }
264
test_mm_cmple_sd(__m128d A,__m128d B)265 __m128d test_mm_cmple_sd(__m128d A, __m128d B) {
266 // CHECK-LABEL: test_mm_cmple_sd
267 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
268 return _mm_cmple_sd(A, B);
269 }
270
test_mm_cmplt_epi8(__m128i A,__m128i B)271 __m128i test_mm_cmplt_epi8(__m128i A, __m128i B) {
272 // CHECK-LABEL: test_mm_cmplt_epi8
273 // CHECK: icmp sgt <16 x i8>
274 return _mm_cmplt_epi8(A, B);
275 }
276
test_mm_cmplt_epi16(__m128i A,__m128i B)277 __m128i test_mm_cmplt_epi16(__m128i A, __m128i B) {
278 // CHECK-LABEL: test_mm_cmplt_epi16
279 // CHECK: icmp sgt <8 x i16>
280 return _mm_cmplt_epi16(A, B);
281 }
282
test_mm_cmplt_epi32(__m128i A,__m128i B)283 __m128i test_mm_cmplt_epi32(__m128i A, __m128i B) {
284 // CHECK-LABEL: test_mm_cmplt_epi32
285 // CHECK: icmp sgt <4 x i32>
286 return _mm_cmplt_epi32(A, B);
287 }
288
test_mm_cmplt_pd(__m128d A,__m128d B)289 __m128d test_mm_cmplt_pd(__m128d A, __m128d B) {
290 // CHECK-LABEL: test_mm_cmplt_pd
291 // CHECK: [[CMP:%.*]] = fcmp olt <2 x double>
292 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
293 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
294 // CHECK-NEXT: ret <2 x double> [[BC]]
295 return _mm_cmplt_pd(A, B);
296 }
297
test_mm_cmplt_sd(__m128d A,__m128d B)298 __m128d test_mm_cmplt_sd(__m128d A, __m128d B) {
299 // CHECK-LABEL: test_mm_cmplt_sd
300 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
301 return _mm_cmplt_sd(A, B);
302 }
303
test_mm_cmpneq_pd(__m128d A,__m128d B)304 __m128d test_mm_cmpneq_pd(__m128d A, __m128d B) {
305 // CHECK-LABEL: test_mm_cmpneq_pd
306 // CHECK: [[CMP:%.*]] = fcmp une <2 x double>
307 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
308 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
309 // CHECK-NEXT: ret <2 x double> [[BC]]
310 return _mm_cmpneq_pd(A, B);
311 }
312
test_mm_cmpneq_sd(__m128d A,__m128d B)313 __m128d test_mm_cmpneq_sd(__m128d A, __m128d B) {
314 // CHECK-LABEL: test_mm_cmpneq_sd
315 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
316 return _mm_cmpneq_sd(A, B);
317 }
318
test_mm_cmpnge_pd(__m128d A,__m128d B)319 __m128d test_mm_cmpnge_pd(__m128d A, __m128d B) {
320 // CHECK-LABEL: test_mm_cmpnge_pd
321 // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double>
322 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
323 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
324 // CHECK-NEXT: ret <2 x double> [[BC]]
325 return _mm_cmpnge_pd(A, B);
326 }
327
test_mm_cmpnge_sd(__m128d A,__m128d B)328 __m128d test_mm_cmpnge_sd(__m128d A, __m128d B) {
329 // CHECK-LABEL: test_mm_cmpnge_sd
330 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
331 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
332 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
333 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
334 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
335 return _mm_cmpnge_sd(A, B);
336 }
337
test_mm_cmpngt_pd(__m128d A,__m128d B)338 __m128d test_mm_cmpngt_pd(__m128d A, __m128d B) {
339 // CHECK-LABEL: test_mm_cmpngt_pd
340 // CHECK: [[CMP:%.*]] = fcmp uge <2 x double>
341 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
342 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
343 // CHECK-NEXT: ret <2 x double> [[BC]]
344 return _mm_cmpngt_pd(A, B);
345 }
346
test_mm_cmpngt_sd(__m128d A,__m128d B)347 __m128d test_mm_cmpngt_sd(__m128d A, __m128d B) {
348 // CHECK-LABEL: test_mm_cmpngt_sd
349 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
350 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
351 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
352 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
353 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
354 return _mm_cmpngt_sd(A, B);
355 }
356
test_mm_cmpnle_pd(__m128d A,__m128d B)357 __m128d test_mm_cmpnle_pd(__m128d A, __m128d B) {
358 // CHECK-LABEL: test_mm_cmpnle_pd
359 // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double>
360 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
361 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
362 // CHECK-NEXT: ret <2 x double> [[BC]]
363 return _mm_cmpnle_pd(A, B);
364 }
365
test_mm_cmpnle_sd(__m128d A,__m128d B)366 __m128d test_mm_cmpnle_sd(__m128d A, __m128d B) {
367 // CHECK-LABEL: test_mm_cmpnle_sd
368 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
369 return _mm_cmpnle_sd(A, B);
370 }
371
test_mm_cmpnlt_pd(__m128d A,__m128d B)372 __m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) {
373 // CHECK-LABEL: test_mm_cmpnlt_pd
374 // CHECK: [[CMP:%.*]] = fcmp uge <2 x double>
375 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
376 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
377 // CHECK-NEXT: ret <2 x double> [[BC]]
378 return _mm_cmpnlt_pd(A, B);
379 }
380
test_mm_cmpnlt_sd(__m128d A,__m128d B)381 __m128d test_mm_cmpnlt_sd(__m128d A, __m128d B) {
382 // CHECK-LABEL: test_mm_cmpnlt_sd
383 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
384 return _mm_cmpnlt_sd(A, B);
385 }
386
test_mm_cmpord_pd(__m128d A,__m128d B)387 __m128d test_mm_cmpord_pd(__m128d A, __m128d B) {
388 // CHECK-LABEL: test_mm_cmpord_pd
389 // CHECK: [[CMP:%.*]] = fcmp ord <2 x double>
390 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
391 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
392 // CHECK-NEXT: ret <2 x double> [[BC]]
393 return _mm_cmpord_pd(A, B);
394 }
395
test_mm_cmpord_sd(__m128d A,__m128d B)396 __m128d test_mm_cmpord_sd(__m128d A, __m128d B) {
397 // CHECK-LABEL: test_mm_cmpord_sd
398 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
399 return _mm_cmpord_sd(A, B);
400 }
401
test_mm_cmpunord_pd(__m128d A,__m128d B)402 __m128d test_mm_cmpunord_pd(__m128d A, __m128d B) {
403 // CHECK-LABEL: test_mm_cmpunord_pd
404 // CHECK: [[CMP:%.*]] = fcmp uno <2 x double>
405 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
406 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
407 // CHECK-NEXT: ret <2 x double> [[BC]]
408 return _mm_cmpunord_pd(A, B);
409 }
410
test_mm_cmpunord_sd(__m128d A,__m128d B)411 __m128d test_mm_cmpunord_sd(__m128d A, __m128d B) {
412 // CHECK-LABEL: test_mm_cmpunord_sd
413 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
414 return _mm_cmpunord_sd(A, B);
415 }
416
test_mm_comieq_sd(__m128d A,__m128d B)417 int test_mm_comieq_sd(__m128d A, __m128d B) {
418 // CHECK-LABEL: test_mm_comieq_sd
419 // CHECK: call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
420 return _mm_comieq_sd(A, B);
421 }
422
test_mm_comige_sd(__m128d A,__m128d B)423 int test_mm_comige_sd(__m128d A, __m128d B) {
424 // CHECK-LABEL: test_mm_comige_sd
425 // CHECK: call i32 @llvm.x86.sse2.comige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
426 return _mm_comige_sd(A, B);
427 }
428
test_mm_comigt_sd(__m128d A,__m128d B)429 int test_mm_comigt_sd(__m128d A, __m128d B) {
430 // CHECK-LABEL: test_mm_comigt_sd
431 // CHECK: call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
432 return _mm_comigt_sd(A, B);
433 }
434
test_mm_comile_sd(__m128d A,__m128d B)435 int test_mm_comile_sd(__m128d A, __m128d B) {
436 // CHECK-LABEL: test_mm_comile_sd
437 // CHECK: call i32 @llvm.x86.sse2.comile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
438 return _mm_comile_sd(A, B);
439 }
440
test_mm_comilt_sd(__m128d A,__m128d B)441 int test_mm_comilt_sd(__m128d A, __m128d B) {
442 // CHECK-LABEL: test_mm_comilt_sd
443 // CHECK: call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
444 return _mm_comilt_sd(A, B);
445 }
446
test_mm_comineq_sd(__m128d A,__m128d B)447 int test_mm_comineq_sd(__m128d A, __m128d B) {
448 // CHECK-LABEL: test_mm_comineq_sd
449 // CHECK: call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
450 return _mm_comineq_sd(A, B);
451 }
452
test_mm_cvtepi32_pd(__m128i A)453 __m128d test_mm_cvtepi32_pd(__m128i A) {
454 // CHECK-LABEL: test_mm_cvtepi32_pd
455 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> <i32 0, i32 1>
456 // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double>
457 return _mm_cvtepi32_pd(A);
458 }
459
test_mm_cvtepi32_ps(__m128i A)460 __m128 test_mm_cvtepi32_ps(__m128i A) {
461 // CHECK-LABEL: test_mm_cvtepi32_ps
462 // CHECK: sitofp <4 x i32> %{{.*}} to <4 x float>
463 return _mm_cvtepi32_ps(A);
464 }
465
test_mm_cvtpd_epi32(__m128d A)466 __m128i test_mm_cvtpd_epi32(__m128d A) {
467 // CHECK-LABEL: test_mm_cvtpd_epi32
468 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %{{.*}})
469 return _mm_cvtpd_epi32(A);
470 }
471
test_mm_cvtpd_ps(__m128d A)472 __m128 test_mm_cvtpd_ps(__m128d A) {
473 // CHECK-LABEL: test_mm_cvtpd_ps
474 // CHECK: call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %{{.*}})
475 return _mm_cvtpd_ps(A);
476 }
477
test_mm_cvtps_epi32(__m128 A)478 __m128i test_mm_cvtps_epi32(__m128 A) {
479 // CHECK-LABEL: test_mm_cvtps_epi32
480 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %{{.*}})
481 return _mm_cvtps_epi32(A);
482 }
483
test_mm_cvtps_pd(__m128 A)484 __m128d test_mm_cvtps_pd(__m128 A) {
485 // CHECK-LABEL: test_mm_cvtps_pd
486 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 0, i32 1>
487 // CHECK: fpext <2 x float> %{{.*}} to <2 x double>
488 return _mm_cvtps_pd(A);
489 }
490
test_mm_cvtsd_f64(__m128d A)491 double test_mm_cvtsd_f64(__m128d A) {
492 // CHECK-LABEL: test_mm_cvtsd_f64
493 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
494 return _mm_cvtsd_f64(A);
495 }
496
test_mm_cvtsd_si32(__m128d A)497 int test_mm_cvtsd_si32(__m128d A) {
498 // CHECK-LABEL: test_mm_cvtsd_si32
499 // CHECK: call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %{{.*}})
500 return _mm_cvtsd_si32(A);
501 }
502
503 #ifdef __x86_64__
test_mm_cvtsd_si64(__m128d A)504 long long test_mm_cvtsd_si64(__m128d A) {
505 // CHECK-LABEL: test_mm_cvtsd_si64
506 // CHECK: call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %{{.*}})
507 return _mm_cvtsd_si64(A);
508 }
509 #endif
510
test_mm_cvtsd_ss(__m128 A,__m128d B)511 __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) {
512 // CHECK-LABEL: test_mm_cvtsd_ss
513 // CHECK: call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %{{.*}}, <2 x double> %{{.*}})
514 return _mm_cvtsd_ss(A, B);
515 }
516
test_mm_cvtsi128_si32(__m128i A)517 int test_mm_cvtsi128_si32(__m128i A) {
518 // CHECK-LABEL: test_mm_cvtsi128_si32
519 // CHECK: extractelement <4 x i32> %{{.*}}, i32 0
520 return _mm_cvtsi128_si32(A);
521 }
522
523 #ifdef __x86_64__
test_mm_cvtsi128_si64(__m128i A)524 long long test_mm_cvtsi128_si64(__m128i A) {
525 // CHECK-LABEL: test_mm_cvtsi128_si64
526 // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
527 return _mm_cvtsi128_si64(A);
528 }
529 #endif
530
test_mm_cvtsi32_sd(__m128d A,int B)531 __m128d test_mm_cvtsi32_sd(__m128d A, int B) {
532 // CHECK-LABEL: test_mm_cvtsi32_sd
533 // CHECK: sitofp i32 %{{.*}} to double
534 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
535 return _mm_cvtsi32_sd(A, B);
536 }
537
test_mm_cvtsi32_si128(int A)538 __m128i test_mm_cvtsi32_si128(int A) {
539 // CHECK-LABEL: test_mm_cvtsi32_si128
540 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
541 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 1
542 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 2
543 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 3
544 return _mm_cvtsi32_si128(A);
545 }
546
547 #ifdef __x86_64__
test_mm_cvtsi64_sd(__m128d A,long long B)548 __m128d test_mm_cvtsi64_sd(__m128d A, long long B) {
549 // CHECK-LABEL: test_mm_cvtsi64_sd
550 // CHECK: sitofp i64 %{{.*}} to double
551 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
552 return _mm_cvtsi64_sd(A, B);
553 }
554
test_mm_cvtsi64_si128(long long A)555 __m128i test_mm_cvtsi64_si128(long long A) {
556 // CHECK-LABEL: test_mm_cvtsi64_si128
557 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
558 // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
559 return _mm_cvtsi64_si128(A);
560 }
561 #endif
562
test_mm_cvtss_sd(__m128d A,__m128 B)563 __m128d test_mm_cvtss_sd(__m128d A, __m128 B) {
564 // CHECK-LABEL: test_mm_cvtss_sd
565 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
566 // CHECK: fpext float %{{.*}} to double
567 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
568 return _mm_cvtss_sd(A, B);
569 }
570
test_mm_cvttpd_epi32(__m128d A)571 __m128i test_mm_cvttpd_epi32(__m128d A) {
572 // CHECK-LABEL: test_mm_cvttpd_epi32
573 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %{{.*}})
574 return _mm_cvttpd_epi32(A);
575 }
576
test_mm_cvttps_epi32(__m128 A)577 __m128i test_mm_cvttps_epi32(__m128 A) {
578 // CHECK-LABEL: test_mm_cvttps_epi32
579 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %{{.*}})
580 return _mm_cvttps_epi32(A);
581 }
582
test_mm_cvttsd_si32(__m128d A)583 int test_mm_cvttsd_si32(__m128d A) {
584 // CHECK-LABEL: test_mm_cvttsd_si32
585 // CHECK: call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %{{.*}})
586 return _mm_cvttsd_si32(A);
587 }
588
589 #ifdef __x86_64__
test_mm_cvttsd_si64(__m128d A)590 long long test_mm_cvttsd_si64(__m128d A) {
591 // CHECK-LABEL: test_mm_cvttsd_si64
592 // CHECK: call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %{{.*}})
593 return _mm_cvttsd_si64(A);
594 }
595 #endif
596
test_mm_div_pd(__m128d A,__m128d B)597 __m128d test_mm_div_pd(__m128d A, __m128d B) {
598 // CHECK-LABEL: test_mm_div_pd
599 // CHECK: fdiv <2 x double>
600 return _mm_div_pd(A, B);
601 }
602
test_mm_div_sd(__m128d A,__m128d B)603 __m128d test_mm_div_sd(__m128d A, __m128d B) {
604 // CHECK-LABEL: test_mm_div_sd
605 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
606 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
607 // CHECK: fdiv double
608 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
609 return _mm_div_sd(A, B);
610 }
611
612 // Lowering to pextrw requires optimization.
test_mm_extract_epi16(__m128i A)613 int test_mm_extract_epi16(__m128i A) {
614 // CHECK-LABEL: test_mm_extract_epi16
615 // CHECK: extractelement <8 x i16> %{{.*}}, {{i32|i64}} 1
616 // CHECK: zext i16 %{{.*}} to i32
617 return _mm_extract_epi16(A, 1);
618 }
619
test_mm_insert_epi16(__m128i A,int B)620 __m128i test_mm_insert_epi16(__m128i A, int B) {
621 // CHECK-LABEL: test_mm_insert_epi16
622 // CHECK: insertelement <8 x i16> %{{.*}}, {{i32|i64}} 0
623 return _mm_insert_epi16(A, B, 0);
624 }
625
test_mm_lfence()626 void test_mm_lfence() {
627 // CHECK-LABEL: test_mm_lfence
628 // CHECK: call void @llvm.x86.sse2.lfence()
629 _mm_lfence();
630 }
631
test_mm_load_pd(double const * A)632 __m128d test_mm_load_pd(double const* A) {
633 // CHECK-LABEL: test_mm_load_pd
634 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
635 return _mm_load_pd(A);
636 }
637
test_mm_load_pd1(double const * A)638 __m128d test_mm_load_pd1(double const* A) {
639 // CHECK-LABEL: test_mm_load_pd1
640 // CHECK: load double, double* %{{.*}}, align 8
641 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
642 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
643 return _mm_load_pd1(A);
644 }
645
test_mm_load_sd(double const * A)646 __m128d test_mm_load_sd(double const* A) {
647 // CHECK-LABEL: test_mm_load_sd
648 // CHECK: load double, double* %{{.*}}, align 1{{$}}
649 return _mm_load_sd(A);
650 }
651
test_mm_load_si128(__m128i const * A)652 __m128i test_mm_load_si128(__m128i const* A) {
653 // CHECK-LABEL: test_mm_load_si128
654 // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
655 return _mm_load_si128(A);
656 }
657
test_mm_load1_pd(double const * A)658 __m128d test_mm_load1_pd(double const* A) {
659 // CHECK-LABEL: test_mm_load1_pd
660 // CHECK: load double, double* %{{.*}}, align 8
661 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
662 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
663 return _mm_load1_pd(A);
664 }
665
test_mm_loadh_pd(__m128d x,void * y)666 __m128d test_mm_loadh_pd(__m128d x, void* y) {
667 // CHECK-LABEL: test_mm_loadh_pd
668 // CHECK: load double, double* %{{.*}}, align 1{{$}}
669 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
670 return _mm_loadh_pd(x, y);
671 }
672
test_mm_loadl_epi64(__m128i * y)673 __m128i test_mm_loadl_epi64(__m128i* y) {
674 // CHECK: test_mm_loadl_epi64
675 // CHECK: load i64, i64* {{.*}}, align 1{{$}}
676 // CHECK: insertelement <2 x i64> undef, i64 {{.*}}, i32 0
677 // CHECK: insertelement <2 x i64> {{.*}}, i64 0, i32 1
678 return _mm_loadl_epi64(y);
679 }
680
test_mm_loadl_pd(__m128d x,void * y)681 __m128d test_mm_loadl_pd(__m128d x, void* y) {
682 // CHECK-LABEL: test_mm_loadl_pd
683 // CHECK: load double, double* %{{.*}}, align 1{{$}}
684 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
685 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
686 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
687 return _mm_loadl_pd(x, y);
688 }
689
test_mm_loadr_pd(double const * A)690 __m128d test_mm_loadr_pd(double const* A) {
691 // CHECK-LABEL: test_mm_loadr_pd
692 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
693 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
694 return _mm_loadr_pd(A);
695 }
696
test_mm_loadu_pd(double const * A)697 __m128d test_mm_loadu_pd(double const* A) {
698 // CHECK-LABEL: test_mm_loadu_pd
699 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1{{$}}
700 return _mm_loadu_pd(A);
701 }
702
test_mm_loadu_si128(__m128i const * A)703 __m128i test_mm_loadu_si128(__m128i const* A) {
704 // CHECK-LABEL: test_mm_loadu_si128
705 // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1{{$}}
706 return _mm_loadu_si128(A);
707 }
708
test_mm_loadu_si64(void const * A)709 __m128i test_mm_loadu_si64(void const* A) {
710 // CHECK-LABEL: test_mm_loadu_si64
711 // CHECK: load i64, i64* %{{.*}}, align 1{{$}}
712 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
713 // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
714 return _mm_loadu_si64(A);
715 }
716
test_mm_loadu_si32(void const * A)717 __m128i test_mm_loadu_si32(void const* A) {
718 // CHECK-LABEL: test_mm_loadu_si32
719 // CHECK: load i32, i32* %{{.*}}, align 1{{$}}
720 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
721 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 1
722 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 2
723 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 3
724 return _mm_loadu_si32(A);
725 }
726
test_mm_loadu_si16(void const * A)727 __m128i test_mm_loadu_si16(void const* A) {
728 // CHECK-LABEL: test_mm_loadu_si16
729 // CHECK: load i16, i16* %{{.*}}, align 1{{$}}
730 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
731 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 1
732 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 2
733 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 3
734 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 4
735 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5
736 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6
737 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7
738 return _mm_loadu_si16(A);
739 }
740
test_mm_madd_epi16(__m128i A,__m128i B)741 __m128i test_mm_madd_epi16(__m128i A, __m128i B) {
742 // CHECK-LABEL: test_mm_madd_epi16
743 // CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
744 return _mm_madd_epi16(A, B);
745 }
746
test_mm_maskmoveu_si128(__m128i A,__m128i B,char * C)747 void test_mm_maskmoveu_si128(__m128i A, __m128i B, char* C) {
748 // CHECK-LABEL: test_mm_maskmoveu_si128
749 // CHECK: call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8* %{{.*}})
750 _mm_maskmoveu_si128(A, B, C);
751 }
752
test_mm_max_epi16(__m128i A,__m128i B)753 __m128i test_mm_max_epi16(__m128i A, __m128i B) {
754 // CHECK-LABEL: test_mm_max_epi16
755 // CHECK: call <8 x i16> @llvm.smax.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
756 return _mm_max_epi16(A, B);
757 }
758
test_mm_max_epu8(__m128i A,__m128i B)759 __m128i test_mm_max_epu8(__m128i A, __m128i B) {
760 // CHECK-LABEL: test_mm_max_epu8
761 // CHECK: call <16 x i8> @llvm.umax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
762 return _mm_max_epu8(A, B);
763 }
764
test_mm_max_pd(__m128d A,__m128d B)765 __m128d test_mm_max_pd(__m128d A, __m128d B) {
766 // CHECK-LABEL: test_mm_max_pd
767 // CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
768 return _mm_max_pd(A, B);
769 }
770
test_mm_max_sd(__m128d A,__m128d B)771 __m128d test_mm_max_sd(__m128d A, __m128d B) {
772 // CHECK-LABEL: test_mm_max_sd
773 // CHECK: call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
774 return _mm_max_sd(A, B);
775 }
776
test_mm_mfence()777 void test_mm_mfence() {
778 // CHECK-LABEL: test_mm_mfence
779 // CHECK: call void @llvm.x86.sse2.mfence()
780 _mm_mfence();
781 }
782
test_mm_min_epi16(__m128i A,__m128i B)783 __m128i test_mm_min_epi16(__m128i A, __m128i B) {
784 // CHECK-LABEL: test_mm_min_epi16
785 // CHECK: call <8 x i16> @llvm.smin.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
786 return _mm_min_epi16(A, B);
787 }
788
test_mm_min_epu8(__m128i A,__m128i B)789 __m128i test_mm_min_epu8(__m128i A, __m128i B) {
790 // CHECK-LABEL: test_mm_min_epu8
791 // CHECK: call <16 x i8> @llvm.umin.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
792 return _mm_min_epu8(A, B);
793 }
794
test_mm_min_pd(__m128d A,__m128d B)795 __m128d test_mm_min_pd(__m128d A, __m128d B) {
796 // CHECK-LABEL: test_mm_min_pd
797 // CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
798 return _mm_min_pd(A, B);
799 }
800
test_mm_min_sd(__m128d A,__m128d B)801 __m128d test_mm_min_sd(__m128d A, __m128d B) {
802 // CHECK-LABEL: test_mm_min_sd
803 // CHECK: call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
804 return _mm_min_sd(A, B);
805 }
806
test_mm_movepi64_pi64(__m128i A)807 __m64 test_mm_movepi64_pi64(__m128i A)
808 {
809 // CHECK-LABEL: test_mm_movepi64_pi64
810 // CHECK: [[EXT:%.*]] = extractelement <2 x i64> %1, i32 0
811 // CHECK: bitcast i64 [[EXT]] to <1 x i64>
812 return _mm_movepi64_pi64(A);
813 }
814
test_mm_movpi64_epi64(__m64 A)815 __m128i test_mm_movpi64_epi64(__m64 A)
816 {
817 // CHECK-LABEL: test_mm_movpi64_epi64
818 // CHECK: [[CAST:%.*]] = bitcast <1 x i64> %{{.*}} to i64
819 // CHECK: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[CAST]], i32 0
820 // CHECK: insertelement <2 x i64> [[INS]], i64 0, i32 1
821 return _mm_movpi64_epi64(A);
822 }
823
test_mm_move_epi64(__m128i A)824 __m128i test_mm_move_epi64(__m128i A) {
825 // CHECK-LABEL: test_mm_move_epi64
826 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
827 return _mm_move_epi64(A);
828 }
829
test_mm_move_sd(__m128d A,__m128d B)830 __m128d test_mm_move_sd(__m128d A, __m128d B) {
831 // CHECK-LABEL: test_mm_move_sd
832 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
833 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
834 return _mm_move_sd(A, B);
835 }
836
test_mm_movemask_epi8(__m128i A)837 int test_mm_movemask_epi8(__m128i A) {
838 // CHECK-LABEL: test_mm_movemask_epi8
839 // CHECK: call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}})
840 return _mm_movemask_epi8(A);
841 }
842
test_mm_movemask_pd(__m128d A)843 int test_mm_movemask_pd(__m128d A) {
844 // CHECK-LABEL: test_mm_movemask_pd
845 // CHECK: call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}})
846 return _mm_movemask_pd(A);
847 }
848
test_mm_mul_epu32(__m128i A,__m128i B)849 __m128i test_mm_mul_epu32(__m128i A, __m128i B) {
850 // CHECK-LABEL: test_mm_mul_epu32
851 // CHECK: and <2 x i64> %{{.*}}, <i64 4294967295, i64 4294967295>
852 // CHECK: and <2 x i64> %{{.*}}, <i64 4294967295, i64 4294967295>
853 // CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
854 return _mm_mul_epu32(A, B);
855 }
856
test_mm_mul_pd(__m128d A,__m128d B)857 __m128d test_mm_mul_pd(__m128d A, __m128d B) {
858 // CHECK-LABEL: test_mm_mul_pd
859 // CHECK: fmul <2 x double> %{{.*}}, %{{.*}}
860 return _mm_mul_pd(A, B);
861 }
862
test_mm_mul_sd(__m128d A,__m128d B)863 __m128d test_mm_mul_sd(__m128d A, __m128d B) {
864 // CHECK-LABEL: test_mm_mul_sd
865 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
866 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
867 // CHECK: fmul double
868 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
869 return _mm_mul_sd(A, B);
870 }
871
test_mm_mulhi_epi16(__m128i A,__m128i B)872 __m128i test_mm_mulhi_epi16(__m128i A, __m128i B) {
873 // CHECK-LABEL: test_mm_mulhi_epi16
874 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
875 return _mm_mulhi_epi16(A, B);
876 }
877
test_mm_mulhi_epu16(__m128i A,__m128i B)878 __m128i test_mm_mulhi_epu16(__m128i A, __m128i B) {
879 // CHECK-LABEL: test_mm_mulhi_epu16
880 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
881 return _mm_mulhi_epu16(A, B);
882 }
883
test_mm_mullo_epi16(__m128i A,__m128i B)884 __m128i test_mm_mullo_epi16(__m128i A, __m128i B) {
885 // CHECK-LABEL: test_mm_mullo_epi16
886 // CHECK: mul <8 x i16> %{{.*}}, %{{.*}}
887 return _mm_mullo_epi16(A, B);
888 }
889
test_mm_or_pd(__m128d A,__m128d B)890 __m128d test_mm_or_pd(__m128d A, __m128d B) {
891 // CHECK-LABEL: test_mm_or_pd
892 // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
893 return _mm_or_pd(A, B);
894 }
895
test_mm_or_si128(__m128i A,__m128i B)896 __m128i test_mm_or_si128(__m128i A, __m128i B) {
897 // CHECK-LABEL: test_mm_or_si128
898 // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
899 return _mm_or_si128(A, B);
900 }
901
test_mm_packs_epi16(__m128i A,__m128i B)902 __m128i test_mm_packs_epi16(__m128i A, __m128i B) {
903 // CHECK-LABEL: test_mm_packs_epi16
904 // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
905 return _mm_packs_epi16(A, B);
906 }
907
test_mm_packs_epi32(__m128i A,__m128i B)908 __m128i test_mm_packs_epi32(__m128i A, __m128i B) {
909 // CHECK-LABEL: test_mm_packs_epi32
910 // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
911 return _mm_packs_epi32(A, B);
912 }
913
test_mm_packus_epi16(__m128i A,__m128i B)914 __m128i test_mm_packus_epi16(__m128i A, __m128i B) {
915 // CHECK-LABEL: test_mm_packus_epi16
916 // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
917 return _mm_packus_epi16(A, B);
918 }
919
test_mm_pause()920 void test_mm_pause() {
921 // CHECK-LABEL: test_mm_pause
922 // CHECK: call void @llvm.x86.sse2.pause()
923 return _mm_pause();
924 }
925
test_mm_sad_epu8(__m128i A,__m128i B)926 __m128i test_mm_sad_epu8(__m128i A, __m128i B) {
927 // CHECK-LABEL: test_mm_sad_epu8
928 // CHECK: call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
929 return _mm_sad_epu8(A, B);
930 }
931
test_mm_set_epi8(char A,char B,char C,char D,char E,char F,char G,char H,char I,char J,char K,char L,char M,char N,char O,char P)932 __m128i test_mm_set_epi8(char A, char B, char C, char D,
933 char E, char F, char G, char H,
934 char I, char J, char K, char L,
935 char M, char N, char O, char P) {
936 // CHECK-LABEL: test_mm_set_epi8
937 // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
938 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
939 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
940 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
941 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
942 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
943 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
944 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
945 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
946 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
947 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
948 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
949 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
950 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
951 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
952 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
953 return _mm_set_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
954 }
955
test_mm_set_epi16(short A,short B,short C,short D,short E,short F,short G,short H)956 __m128i test_mm_set_epi16(short A, short B, short C, short D,
957 short E, short F, short G, short H) {
958 // CHECK-LABEL: test_mm_set_epi16
959 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
960 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
961 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
962 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
963 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
964 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
965 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
966 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
967 return _mm_set_epi16(A, B, C, D, E, F, G, H);
968 }
969
test_mm_set_epi32(int A,int B,int C,int D)970 __m128i test_mm_set_epi32(int A, int B, int C, int D) {
971 // CHECK-LABEL: test_mm_set_epi32
972 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
973 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
974 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
975 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
976 return _mm_set_epi32(A, B, C, D);
977 }
978
test_mm_set_epi64(__m64 A,__m64 B)979 __m128i test_mm_set_epi64(__m64 A, __m64 B) {
980 // CHECK-LABEL: test_mm_set_epi64
981 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
982 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
983 return _mm_set_epi64(A, B);
984 }
985
test_mm_set_epi64x(long long A,long long B)986 __m128i test_mm_set_epi64x(long long A, long long B) {
987 // CHECK-LABEL: test_mm_set_epi64x
988 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
989 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
990 return _mm_set_epi64x(A, B);
991 }
992
test_mm_set_pd(double A,double B)993 __m128d test_mm_set_pd(double A, double B) {
994 // CHECK-LABEL: test_mm_set_pd
995 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
996 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
997 return _mm_set_pd(A, B);
998 }
999
test_mm_set_pd1(double A)1000 __m128d test_mm_set_pd1(double A) {
1001 // CHECK-LABEL: test_mm_set_pd1
1002 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1003 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1004 return _mm_set_pd1(A);
1005 }
1006
test_mm_set_sd(double A)1007 __m128d test_mm_set_sd(double A) {
1008 // CHECK-LABEL: test_mm_set_sd
1009 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1010 // CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1
1011 return _mm_set_sd(A);
1012 }
1013
test_mm_set1_epi8(char A)1014 __m128i test_mm_set1_epi8(char A) {
1015 // CHECK-LABEL: test_mm_set1_epi8
1016 // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
1017 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
1018 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
1019 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
1020 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
1021 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
1022 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
1023 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
1024 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
1025 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
1026 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
1027 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
1028 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
1029 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
1030 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
1031 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
1032 return _mm_set1_epi8(A);
1033 }
1034
test_mm_set1_epi16(short A)1035 __m128i test_mm_set1_epi16(short A) {
1036 // CHECK-LABEL: test_mm_set1_epi16
1037 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
1038 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
1039 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
1040 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
1041 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
1042 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
1043 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
1044 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
1045 return _mm_set1_epi16(A);
1046 }
1047
test_mm_set1_epi32(int A)1048 __m128i test_mm_set1_epi32(int A) {
1049 // CHECK-LABEL: test_mm_set1_epi32
1050 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
1051 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
1052 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
1053 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
1054 return _mm_set1_epi32(A);
1055 }
1056
test_mm_set1_epi64(__m64 A)1057 __m128i test_mm_set1_epi64(__m64 A) {
1058 // CHECK-LABEL: test_mm_set1_epi64
1059 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1060 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1061 return _mm_set1_epi64(A);
1062 }
1063
test_mm_set1_epi64x(long long A)1064 __m128i test_mm_set1_epi64x(long long A) {
1065 // CHECK-LABEL: test_mm_set1_epi64x
1066 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1067 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1068 return _mm_set1_epi64x(A);
1069 }
1070
test_mm_set1_pd(double A)1071 __m128d test_mm_set1_pd(double A) {
1072 // CHECK-LABEL: test_mm_set1_pd
1073 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1074 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1075 return _mm_set1_pd(A);
1076 }
1077
test_mm_setr_epi8(char A,char B,char C,char D,char E,char F,char G,char H,char I,char J,char K,char L,char M,char N,char O,char P)1078 __m128i test_mm_setr_epi8(char A, char B, char C, char D,
1079 char E, char F, char G, char H,
1080 char I, char J, char K, char L,
1081 char M, char N, char O, char P) {
1082 // CHECK-LABEL: test_mm_setr_epi8
1083 // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
1084 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
1085 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
1086 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
1087 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
1088 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
1089 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
1090 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
1091 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
1092 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
1093 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
1094 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
1095 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
1096 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
1097 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
1098 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
1099 return _mm_setr_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
1100 }
1101
test_mm_setr_epi16(short A,short B,short C,short D,short E,short F,short G,short H)1102 __m128i test_mm_setr_epi16(short A, short B, short C, short D,
1103 short E, short F, short G, short H) {
1104 // CHECK-LABEL: test_mm_setr_epi16
1105 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
1106 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
1107 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
1108 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
1109 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
1110 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
1111 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
1112 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
1113 return _mm_setr_epi16(A, B, C, D, E, F, G, H);
1114 }
1115
test_mm_setr_epi32(int A,int B,int C,int D)1116 __m128i test_mm_setr_epi32(int A, int B, int C, int D) {
1117 // CHECK-LABEL: test_mm_setr_epi32
1118 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
1119 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
1120 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
1121 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
1122 return _mm_setr_epi32(A, B, C, D);
1123 }
1124
test_mm_setr_epi64(__m64 A,__m64 B)1125 __m128i test_mm_setr_epi64(__m64 A, __m64 B) {
1126 // CHECK-LABEL: test_mm_setr_epi64
1127 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1128 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1129 return _mm_setr_epi64(A, B);
1130 }
1131
test_mm_setr_pd(double A,double B)1132 __m128d test_mm_setr_pd(double A, double B) {
1133 // CHECK-LABEL: test_mm_setr_pd
1134 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1135 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1136 return _mm_setr_pd(A, B);
1137 }
1138
test_mm_setzero_pd()1139 __m128d test_mm_setzero_pd() {
1140 // CHECK-LABEL: test_mm_setzero_pd
1141 // CHECK: store <2 x double> zeroinitializer
1142 return _mm_setzero_pd();
1143 }
1144
test_mm_setzero_si128()1145 __m128i test_mm_setzero_si128() {
1146 // CHECK-LABEL: test_mm_setzero_si128
1147 // CHECK: store <2 x i64> zeroinitializer
1148 return _mm_setzero_si128();
1149 }
1150
test_mm_shuffle_epi32(__m128i A)1151 __m128i test_mm_shuffle_epi32(__m128i A) {
1152 // CHECK-LABEL: test_mm_shuffle_epi32
1153 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer
1154 return _mm_shuffle_epi32(A, 0);
1155 }
1156
test_mm_shuffle_pd(__m128d A,__m128d B)1157 __m128d test_mm_shuffle_pd(__m128d A, __m128d B) {
1158 // CHECK-LABEL: test_mm_shuffle_pd
1159 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 2>
1160 return _mm_shuffle_pd(A, B, 1);
1161 }
1162
test_mm_shufflehi_epi16(__m128i A)1163 __m128i test_mm_shufflehi_epi16(__m128i A) {
1164 // CHECK-LABEL: test_mm_shufflehi_epi16
1165 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
1166 return _mm_shufflehi_epi16(A, 0);
1167 }
1168
test_mm_shufflelo_epi16(__m128i A)1169 __m128i test_mm_shufflelo_epi16(__m128i A) {
1170 // CHECK-LABEL: test_mm_shufflelo_epi16
1171 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
1172 return _mm_shufflelo_epi16(A, 0);
1173 }
1174
test_mm_sll_epi16(__m128i A,__m128i B)1175 __m128i test_mm_sll_epi16(__m128i A, __m128i B) {
1176 // CHECK-LABEL: test_mm_sll_epi16
1177 // CHECK: call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1178 return _mm_sll_epi16(A, B);
1179 }
1180
test_mm_sll_epi32(__m128i A,__m128i B)1181 __m128i test_mm_sll_epi32(__m128i A, __m128i B) {
1182 // CHECK-LABEL: test_mm_sll_epi32
1183 // CHECK: call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1184 return _mm_sll_epi32(A, B);
1185 }
1186
test_mm_sll_epi64(__m128i A,__m128i B)1187 __m128i test_mm_sll_epi64(__m128i A, __m128i B) {
1188 // CHECK-LABEL: test_mm_sll_epi64
1189 // CHECK: call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
1190 return _mm_sll_epi64(A, B);
1191 }
1192
test_mm_slli_epi16(__m128i A)1193 __m128i test_mm_slli_epi16(__m128i A) {
1194 // CHECK-LABEL: test_mm_slli_epi16
1195 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1196 return _mm_slli_epi16(A, 1);
1197 }
1198
test_mm_slli_epi16_1(__m128i A)1199 __m128i test_mm_slli_epi16_1(__m128i A) {
1200 // CHECK-LABEL: test_mm_slli_epi16_1
1201 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1202 return _mm_slli_epi16(A, -1);
1203 }
1204
test_mm_slli_epi16_2(__m128i A,int B)1205 __m128i test_mm_slli_epi16_2(__m128i A, int B) {
1206 // CHECK-LABEL: test_mm_slli_epi16_2
1207 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1208 return _mm_slli_epi16(A, B);
1209 }
1210
test_mm_slli_epi32(__m128i A)1211 __m128i test_mm_slli_epi32(__m128i A) {
1212 // CHECK-LABEL: test_mm_slli_epi32
1213 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1214 return _mm_slli_epi32(A, 1);
1215 }
1216
test_mm_slli_epi32_1(__m128i A)1217 __m128i test_mm_slli_epi32_1(__m128i A) {
1218 // CHECK-LABEL: test_mm_slli_epi32_1
1219 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1220 return _mm_slli_epi32(A, -1);
1221 }
1222
test_mm_slli_epi32_2(__m128i A,int B)1223 __m128i test_mm_slli_epi32_2(__m128i A, int B) {
1224 // CHECK-LABEL: test_mm_slli_epi32_2
1225 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1226 return _mm_slli_epi32(A, B);
1227 }
1228
test_mm_slli_epi64(__m128i A)1229 __m128i test_mm_slli_epi64(__m128i A) {
1230 // CHECK-LABEL: test_mm_slli_epi64
1231 // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1232 return _mm_slli_epi64(A, 1);
1233 }
1234
test_mm_slli_epi64_1(__m128i A)1235 __m128i test_mm_slli_epi64_1(__m128i A) {
1236 // CHECK-LABEL: test_mm_slli_epi64_1
1237 // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1238 return _mm_slli_epi64(A, -1);
1239 }
1240
test_mm_slli_epi64_2(__m128i A,int B)1241 __m128i test_mm_slli_epi64_2(__m128i A, int B) {
1242 // CHECK-LABEL: test_mm_slli_epi64_2
1243 // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1244 return _mm_slli_epi64(A, B);
1245 }
1246
test_mm_slli_si128(__m128i A)1247 __m128i test_mm_slli_si128(__m128i A) {
1248 // CHECK-LABEL: test_mm_slli_si128
1249 // CHECK: shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
1250 return _mm_slli_si128(A, 5);
1251 }
1252
test_mm_slli_si128_2(__m128i A)1253 __m128i test_mm_slli_si128_2(__m128i A) {
1254 // CHECK-LABEL: test_mm_slli_si128_2
1255 // CHECK: ret <2 x i64> zeroinitializer
1256 return _mm_slli_si128(A, 17);
1257 }
1258
test_mm_sqrt_pd(__m128d A)1259 __m128d test_mm_sqrt_pd(__m128d A) {
1260 // CHECK-LABEL: test_mm_sqrt_pd
1261 // CHECK: call <2 x double> @llvm.sqrt.v2f64(<2 x double> %{{.*}})
1262 return _mm_sqrt_pd(A);
1263 }
1264
test_mm_sqrt_sd(__m128d A,__m128d B)1265 __m128d test_mm_sqrt_sd(__m128d A, __m128d B) {
1266 // CHECK-LABEL: test_mm_sqrt_sd
1267 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
1268 // CHECK: call double @llvm.sqrt.f64(double {{.*}})
1269 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
1270 return _mm_sqrt_sd(A, B);
1271 }
1272
test_mm_sra_epi16(__m128i A,__m128i B)1273 __m128i test_mm_sra_epi16(__m128i A, __m128i B) {
1274 // CHECK-LABEL: test_mm_sra_epi16
1275 // CHECK: call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1276 return _mm_sra_epi16(A, B);
1277 }
1278
test_mm_sra_epi32(__m128i A,__m128i B)1279 __m128i test_mm_sra_epi32(__m128i A, __m128i B) {
1280 // CHECK-LABEL: test_mm_sra_epi32
1281 // CHECK: call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1282 return _mm_sra_epi32(A, B);
1283 }
1284
test_mm_srai_epi16(__m128i A)1285 __m128i test_mm_srai_epi16(__m128i A) {
1286 // CHECK-LABEL: test_mm_srai_epi16
1287 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1288 return _mm_srai_epi16(A, 1);
1289 }
1290
test_mm_srai_epi16_1(__m128i A)1291 __m128i test_mm_srai_epi16_1(__m128i A) {
1292 // CHECK-LABEL: test_mm_srai_epi16_1
1293 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1294 return _mm_srai_epi16(A, -1);
1295 }
1296
test_mm_srai_epi16_2(__m128i A,int B)1297 __m128i test_mm_srai_epi16_2(__m128i A, int B) {
1298 // CHECK-LABEL: test_mm_srai_epi16_2
1299 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1300 return _mm_srai_epi16(A, B);
1301 }
1302
test_mm_srai_epi32(__m128i A)1303 __m128i test_mm_srai_epi32(__m128i A) {
1304 // CHECK-LABEL: test_mm_srai_epi32
1305 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1306 return _mm_srai_epi32(A, 1);
1307 }
1308
test_mm_srai_epi32_1(__m128i A)1309 __m128i test_mm_srai_epi32_1(__m128i A) {
1310 // CHECK-LABEL: test_mm_srai_epi32_1
1311 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1312 return _mm_srai_epi32(A, -1);
1313 }
1314
test_mm_srai_epi32_2(__m128i A,int B)1315 __m128i test_mm_srai_epi32_2(__m128i A, int B) {
1316 // CHECK-LABEL: test_mm_srai_epi32_2
1317 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1318 return _mm_srai_epi32(A, B);
1319 }
1320
test_mm_srl_epi16(__m128i A,__m128i B)1321 __m128i test_mm_srl_epi16(__m128i A, __m128i B) {
1322 // CHECK-LABEL: test_mm_srl_epi16
1323 // CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1324 return _mm_srl_epi16(A, B);
1325 }
1326
test_mm_srl_epi32(__m128i A,__m128i B)1327 __m128i test_mm_srl_epi32(__m128i A, __m128i B) {
1328 // CHECK-LABEL: test_mm_srl_epi32
1329 // CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1330 return _mm_srl_epi32(A, B);
1331 }
1332
test_mm_srl_epi64(__m128i A,__m128i B)1333 __m128i test_mm_srl_epi64(__m128i A, __m128i B) {
1334 // CHECK-LABEL: test_mm_srl_epi64
1335 // CHECK: call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
1336 return _mm_srl_epi64(A, B);
1337 }
1338
test_mm_srli_epi16(__m128i A)1339 __m128i test_mm_srli_epi16(__m128i A) {
1340 // CHECK-LABEL: test_mm_srli_epi16
1341 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1342 return _mm_srli_epi16(A, 1);
1343 }
1344
test_mm_srli_epi16_1(__m128i A)1345 __m128i test_mm_srli_epi16_1(__m128i A) {
1346 // CHECK-LABEL: test_mm_srli_epi16_1
1347 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1348 return _mm_srli_epi16(A, -1);
1349 }
1350
test_mm_srli_epi16_2(__m128i A,int B)1351 __m128i test_mm_srli_epi16_2(__m128i A, int B) {
1352 // CHECK-LABEL: test_mm_srli_epi16
1353 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1354 return _mm_srli_epi16(A, B);
1355 }
1356
test_mm_srli_epi32(__m128i A)1357 __m128i test_mm_srli_epi32(__m128i A) {
1358 // CHECK-LABEL: test_mm_srli_epi32
1359 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1360 return _mm_srli_epi32(A, 1);
1361 }
1362
test_mm_srli_epi32_1(__m128i A)1363 __m128i test_mm_srli_epi32_1(__m128i A) {
1364 // CHECK-LABEL: test_mm_srli_epi32_1
1365 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1366 return _mm_srli_epi32(A, -1);
1367 }
1368
test_mm_srli_epi32_2(__m128i A,int B)1369 __m128i test_mm_srli_epi32_2(__m128i A, int B) {
1370 // CHECK-LABEL: test_mm_srli_epi32_2
1371 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1372 return _mm_srli_epi32(A, B);
1373 }
1374
test_mm_srli_epi64(__m128i A)1375 __m128i test_mm_srli_epi64(__m128i A) {
1376 // CHECK-LABEL: test_mm_srli_epi64
1377 // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1378 return _mm_srli_epi64(A, 1);
1379 }
1380
test_mm_srli_epi64_1(__m128i A)1381 __m128i test_mm_srli_epi64_1(__m128i A) {
1382 // CHECK-LABEL: test_mm_srli_epi64_1
1383 // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1384 return _mm_srli_epi64(A, -1);
1385 }
1386
test_mm_srli_epi64_2(__m128i A,int B)1387 __m128i test_mm_srli_epi64_2(__m128i A, int B) {
1388 // CHECK-LABEL: test_mm_srli_epi64_2
1389 // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1390 return _mm_srli_epi64(A, B);
1391 }
1392
test_mm_srli_si128(__m128i A)1393 __m128i test_mm_srli_si128(__m128i A) {
1394 // CHECK-LABEL: test_mm_srli_si128
1395 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
1396 return _mm_srli_si128(A, 5);
1397 }
1398
test_mm_srli_si128_2(__m128i A)1399 __m128i test_mm_srli_si128_2(__m128i A) {
1400 // CHECK-LABEL: test_mm_srli_si128_2
1401 // ret <2 x i64> zeroinitializer
1402 return _mm_srli_si128(A, 17);
1403 }
1404
test_mm_store_pd(double * A,__m128d B)1405 void test_mm_store_pd(double* A, __m128d B) {
1406 // CHECK-LABEL: test_mm_store_pd
1407 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
1408 _mm_store_pd(A, B);
1409 }
1410
test_mm_store_pd1(double * x,__m128d y)1411 void test_mm_store_pd1(double* x, __m128d y) {
1412 // CHECK-LABEL: test_mm_store_pd1
1413 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
1414 // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16
1415 _mm_store_pd1(x, y);
1416 }
1417
test_mm_store_sd(double * A,__m128d B)1418 void test_mm_store_sd(double* A, __m128d B) {
1419 // CHECK-LABEL: test_mm_store_sd
1420 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1421 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1422 _mm_store_sd(A, B);
1423 }
1424
test_mm_store_si128(__m128i * A,__m128i B)1425 void test_mm_store_si128(__m128i* A, __m128i B) {
1426 // CHECK-LABEL: test_mm_store_si128
1427 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
1428 _mm_store_si128(A, B);
1429 }
1430
test_mm_store1_pd(double * x,__m128d y)1431 void test_mm_store1_pd(double* x, __m128d y) {
1432 // CHECK-LABEL: test_mm_store1_pd
1433 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
1434 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
1435 _mm_store1_pd(x, y);
1436 }
1437
test_mm_storeh_pd(double * A,__m128d B)1438 void test_mm_storeh_pd(double* A, __m128d B) {
1439 // CHECK-LABEL: test_mm_storeh_pd
1440 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
1441 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1442 _mm_storeh_pd(A, B);
1443 }
1444
test_mm_storel_epi64(__m128i x,void * y)1445 void test_mm_storel_epi64(__m128i x, void* y) {
1446 // CHECK-LABEL: test_mm_storel_epi64
1447 // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
1448 // CHECK: store {{.*}} i64* {{.*}}, align 1{{$}}
1449 _mm_storel_epi64(y, x);
1450 }
1451
test_mm_storel_pd(double * A,__m128d B)1452 void test_mm_storel_pd(double* A, __m128d B) {
1453 // CHECK-LABEL: test_mm_storel_pd
1454 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1455 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1456 _mm_storel_pd(A, B);
1457 }
1458
test_mm_storer_pd(__m128d A,double * B)1459 void test_mm_storer_pd(__m128d A, double* B) {
1460 // CHECK-LABEL: test_mm_storer_pd
1461 // CHECK: shufflevector <2 x double> {{.*}}, <2 x double> {{.*}}, <2 x i32> <i32 1, i32 0>
1462 // CHECK: store {{.*}} <2 x double>* {{.*}}, align 16{{$}}
1463 _mm_storer_pd(B, A);
1464 }
1465
test_mm_storeu_pd(double * A,__m128d B)1466 void test_mm_storeu_pd(double* A, __m128d B) {
1467 // CHECK-LABEL: test_mm_storeu_pd
1468 // CHECK: store {{.*}} <2 x double>* {{.*}}, align 1{{$}}
1469 // CHECK-NEXT: ret void
1470 _mm_storeu_pd(A, B);
1471 }
1472
test_mm_storeu_si128(__m128i * A,__m128i B)1473 void test_mm_storeu_si128(__m128i* A, __m128i B) {
1474 // CHECK-LABEL: test_mm_storeu_si128
1475 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}}
1476 // CHECK-NEXT: ret void
1477 _mm_storeu_si128(A, B);
1478 }
1479
test_mm_storeu_si64(void * A,__m128i B)1480 void test_mm_storeu_si64(void* A, __m128i B) {
1481 // CHECK-LABEL: test_mm_storeu_si64
1482 // CHECK: [[EXT:%.*]] = extractelement <2 x i64> %{{.*}}, i32 0
1483 // CHECK: store i64 [[EXT]], i64* %{{.*}}, align 1{{$}}
1484 // CHECK-NEXT: ret void
1485 _mm_storeu_si64(A, B);
1486 }
1487
test_mm_storeu_si32(void * A,__m128i B)1488 void test_mm_storeu_si32(void* A, __m128i B) {
1489 // CHECK-LABEL: test_mm_storeu_si32
1490 // CHECK: [[EXT:%.*]] = extractelement <4 x i32> %{{.*}}, i32 0
1491 // CHECK: store i32 [[EXT]], i32* %{{.*}}, align 1{{$}}
1492 // CHECK-NEXT: ret void
1493 _mm_storeu_si32(A, B);
1494 }
1495
test_mm_storeu_si16(void * A,__m128i B)1496 void test_mm_storeu_si16(void* A, __m128i B) {
1497 // CHECK-LABEL: test_mm_storeu_si16
1498 // CHECK: [[EXT:%.*]] = extractelement <8 x i16> %{{.*}}, i32 0
1499 // CHECK: store i16 [[EXT]], i16* %{{.*}}, align 1{{$}}
1500 // CHECK-NEXT: ret void
1501 _mm_storeu_si16(A, B);
1502 }
1503
test_mm_stream_pd(double * A,__m128d B)1504 void test_mm_stream_pd(double *A, __m128d B) {
1505 // CHECK-LABEL: test_mm_stream_pd
1506 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16, !nontemporal
1507 _mm_stream_pd(A, B);
1508 }
1509
test_mm_stream_si32(int * A,int B)1510 void test_mm_stream_si32(int *A, int B) {
1511 // CHECK-LABEL: test_mm_stream_si32
1512 // CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 1, !nontemporal
1513 _mm_stream_si32(A, B);
1514 }
1515
1516 #ifdef __x86_64__
test_mm_stream_si64(long long * A,long long B)1517 void test_mm_stream_si64(long long *A, long long B) {
1518 // CHECK-LABEL: test_mm_stream_si64
1519 // CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 1, !nontemporal
1520 _mm_stream_si64(A, B);
1521 }
1522 #endif
1523
test_mm_stream_si128(__m128i * A,__m128i B)1524 void test_mm_stream_si128(__m128i *A, __m128i B) {
1525 // CHECK-LABEL: test_mm_stream_si128
1526 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16, !nontemporal
1527 _mm_stream_si128(A, B);
1528 }
1529
test_mm_sub_epi8(__m128i A,__m128i B)1530 __m128i test_mm_sub_epi8(__m128i A, __m128i B) {
1531 // CHECK-LABEL: test_mm_sub_epi8
1532 // CHECK: sub <16 x i8>
1533 return _mm_sub_epi8(A, B);
1534 }
1535
test_mm_sub_epi16(__m128i A,__m128i B)1536 __m128i test_mm_sub_epi16(__m128i A, __m128i B) {
1537 // CHECK-LABEL: test_mm_sub_epi16
1538 // CHECK: sub <8 x i16>
1539 return _mm_sub_epi16(A, B);
1540 }
1541
test_mm_sub_epi32(__m128i A,__m128i B)1542 __m128i test_mm_sub_epi32(__m128i A, __m128i B) {
1543 // CHECK-LABEL: test_mm_sub_epi32
1544 // CHECK: sub <4 x i32>
1545 return _mm_sub_epi32(A, B);
1546 }
1547
test_mm_sub_epi64(__m128i A,__m128i B)1548 __m128i test_mm_sub_epi64(__m128i A, __m128i B) {
1549 // CHECK-LABEL: test_mm_sub_epi64
1550 // CHECK: sub <2 x i64>
1551 return _mm_sub_epi64(A, B);
1552 }
1553
test_mm_sub_pd(__m128d A,__m128d B)1554 __m128d test_mm_sub_pd(__m128d A, __m128d B) {
1555 // CHECK-LABEL: test_mm_sub_pd
1556 // CHECK: fsub <2 x double>
1557 return _mm_sub_pd(A, B);
1558 }
1559
test_mm_sub_sd(__m128d A,__m128d B)1560 __m128d test_mm_sub_sd(__m128d A, __m128d B) {
1561 // CHECK-LABEL: test_mm_sub_sd
1562 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1563 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1564 // CHECK: fsub double
1565 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
1566 return _mm_sub_sd(A, B);
1567 }
1568
test_mm_subs_epi8(__m128i A,__m128i B)1569 __m128i test_mm_subs_epi8(__m128i A, __m128i B) {
1570 // CHECK-LABEL: test_mm_subs_epi8
1571 // CHECK: call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1572 return _mm_subs_epi8(A, B);
1573 }
1574
test_mm_subs_epi16(__m128i A,__m128i B)1575 __m128i test_mm_subs_epi16(__m128i A, __m128i B) {
1576 // CHECK-LABEL: test_mm_subs_epi16
1577 // CHECK: call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1578 return _mm_subs_epi16(A, B);
1579 }
1580
test_mm_subs_epu8(__m128i A,__m128i B)1581 __m128i test_mm_subs_epu8(__m128i A, __m128i B) {
1582 // CHECK-LABEL: test_mm_subs_epu8
1583 // CHECK-NOT: call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1584 // CHECK: call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1585 return _mm_subs_epu8(A, B);
1586 }
1587
test_mm_subs_epu16(__m128i A,__m128i B)1588 __m128i test_mm_subs_epu16(__m128i A, __m128i B) {
1589 // CHECK-LABEL: test_mm_subs_epu16
1590 // CHECK-NOT: call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1591 // CHECK: call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1592 return _mm_subs_epu16(A, B);
1593 }
1594
test_mm_ucomieq_sd(__m128d A,__m128d B)1595 int test_mm_ucomieq_sd(__m128d A, __m128d B) {
1596 // CHECK-LABEL: test_mm_ucomieq_sd
1597 // CHECK: call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1598 return _mm_ucomieq_sd(A, B);
1599 }
1600
test_mm_ucomige_sd(__m128d A,__m128d B)1601 int test_mm_ucomige_sd(__m128d A, __m128d B) {
1602 // CHECK-LABEL: test_mm_ucomige_sd
1603 // CHECK: call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1604 return _mm_ucomige_sd(A, B);
1605 }
1606
test_mm_ucomigt_sd(__m128d A,__m128d B)1607 int test_mm_ucomigt_sd(__m128d A, __m128d B) {
1608 // CHECK-LABEL: test_mm_ucomigt_sd
1609 // CHECK: call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1610 return _mm_ucomigt_sd(A, B);
1611 }
1612
test_mm_ucomile_sd(__m128d A,__m128d B)1613 int test_mm_ucomile_sd(__m128d A, __m128d B) {
1614 // CHECK-LABEL: test_mm_ucomile_sd
1615 // CHECK: call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1616 return _mm_ucomile_sd(A, B);
1617 }
1618
test_mm_ucomilt_sd(__m128d A,__m128d B)1619 int test_mm_ucomilt_sd(__m128d A, __m128d B) {
1620 // CHECK-LABEL: test_mm_ucomilt_sd
1621 // CHECK: call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1622 return _mm_ucomilt_sd(A, B);
1623 }
1624
test_mm_ucomineq_sd(__m128d A,__m128d B)1625 int test_mm_ucomineq_sd(__m128d A, __m128d B) {
1626 // CHECK-LABEL: test_mm_ucomineq_sd
1627 // CHECK: call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1628 return _mm_ucomineq_sd(A, B);
1629 }
1630
test_mm_undefined_pd()1631 __m128d test_mm_undefined_pd() {
1632 // CHECK-LABEL: test_mm_undefined_pd
1633 // CHECK: ret <2 x double> zeroinitializer
1634 return _mm_undefined_pd();
1635 }
1636
test_mm_undefined_si128()1637 __m128i test_mm_undefined_si128() {
1638 // CHECK-LABEL: test_mm_undefined_si128
1639 // CHECK: ret <2 x i64> zeroinitializer
1640 return _mm_undefined_si128();
1641 }
1642
test_mm_unpackhi_epi8(__m128i A,__m128i B)1643 __m128i test_mm_unpackhi_epi8(__m128i A, __m128i B) {
1644 // CHECK-LABEL: test_mm_unpackhi_epi8
1645 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1646 return _mm_unpackhi_epi8(A, B);
1647 }
1648
test_mm_unpackhi_epi16(__m128i A,__m128i B)1649 __m128i test_mm_unpackhi_epi16(__m128i A, __m128i B) {
1650 // CHECK-LABEL: test_mm_unpackhi_epi16
1651 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1652 return _mm_unpackhi_epi16(A, B);
1653 }
1654
test_mm_unpackhi_epi32(__m128i A,__m128i B)1655 __m128i test_mm_unpackhi_epi32(__m128i A, __m128i B) {
1656 // CHECK-LABEL: test_mm_unpackhi_epi32
1657 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1658 return _mm_unpackhi_epi32(A, B);
1659 }
1660
test_mm_unpackhi_epi64(__m128i A,__m128i B)1661 __m128i test_mm_unpackhi_epi64(__m128i A, __m128i B) {
1662 // CHECK-LABEL: test_mm_unpackhi_epi64
1663 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 3>
1664 return _mm_unpackhi_epi64(A, B);
1665 }
1666
test_mm_unpackhi_pd(__m128d A,__m128d B)1667 __m128d test_mm_unpackhi_pd(__m128d A, __m128d B) {
1668 // CHECK-LABEL: test_mm_unpackhi_pd
1669 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
1670 return _mm_unpackhi_pd(A, B);
1671 }
1672
test_mm_unpacklo_epi8(__m128i A,__m128i B)1673 __m128i test_mm_unpacklo_epi8(__m128i A, __m128i B) {
1674 // CHECK-LABEL: test_mm_unpacklo_epi8
1675 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1676 return _mm_unpacklo_epi8(A, B);
1677 }
1678
test_mm_unpacklo_epi16(__m128i A,__m128i B)1679 __m128i test_mm_unpacklo_epi16(__m128i A, __m128i B) {
1680 // CHECK-LABEL: test_mm_unpacklo_epi16
1681 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1682 return _mm_unpacklo_epi16(A, B);
1683 }
1684
test_mm_unpacklo_epi32(__m128i A,__m128i B)1685 __m128i test_mm_unpacklo_epi32(__m128i A, __m128i B) {
1686 // CHECK-LABEL: test_mm_unpacklo_epi32
1687 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1688 return _mm_unpacklo_epi32(A, B);
1689 }
1690
test_mm_unpacklo_epi64(__m128i A,__m128i B)1691 __m128i test_mm_unpacklo_epi64(__m128i A, __m128i B) {
1692 // CHECK-LABEL: test_mm_unpacklo_epi64
1693 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
1694 return _mm_unpacklo_epi64(A, B);
1695 }
1696
test_mm_unpacklo_pd(__m128d A,__m128d B)1697 __m128d test_mm_unpacklo_pd(__m128d A, __m128d B) {
1698 // CHECK-LABEL: test_mm_unpacklo_pd
1699 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2>
1700 return _mm_unpacklo_pd(A, B);
1701 }
1702
test_mm_xor_pd(__m128d A,__m128d B)1703 __m128d test_mm_xor_pd(__m128d A, __m128d B) {
1704 // CHECK-LABEL: test_mm_xor_pd
1705 // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
1706 return _mm_xor_pd(A, B);
1707 }
1708
test_mm_xor_si128(__m128i A,__m128i B)1709 __m128i test_mm_xor_si128(__m128i A, __m128i B) {
1710 // CHECK-LABEL: test_mm_xor_si128
1711 // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
1712 return _mm_xor_si128(A, B);
1713 }
1714