1 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s
2 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s
3 // RUN: %clang_cc1 -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s
4
5
6 #include <immintrin.h>
7
8 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
9
test_mm_add_epi8(__m128i A,__m128i B)10 __m128i test_mm_add_epi8(__m128i A, __m128i B) {
11 // CHECK-LABEL: test_mm_add_epi8
12 // CHECK: add <16 x i8>
13 return _mm_add_epi8(A, B);
14 }
15
test_mm_add_epi16(__m128i A,__m128i B)16 __m128i test_mm_add_epi16(__m128i A, __m128i B) {
17 // CHECK-LABEL: test_mm_add_epi16
18 // CHECK: add <8 x i16>
19 return _mm_add_epi16(A, B);
20 }
21
test_mm_add_epi32(__m128i A,__m128i B)22 __m128i test_mm_add_epi32(__m128i A, __m128i B) {
23 // CHECK-LABEL: test_mm_add_epi32
24 // CHECK: add <4 x i32>
25 return _mm_add_epi32(A, B);
26 }
27
test_mm_add_epi64(__m128i A,__m128i B)28 __m128i test_mm_add_epi64(__m128i A, __m128i B) {
29 // CHECK-LABEL: test_mm_add_epi64
30 // CHECK: add <2 x i64>
31 return _mm_add_epi64(A, B);
32 }
33
test_mm_add_pd(__m128d A,__m128d B)34 __m128d test_mm_add_pd(__m128d A, __m128d B) {
35 // CHECK-LABEL: test_mm_add_pd
36 // CHECK: fadd <2 x double>
37 return _mm_add_pd(A, B);
38 }
39
test_mm_add_sd(__m128d A,__m128d B)40 __m128d test_mm_add_sd(__m128d A, __m128d B) {
41 // CHECK-LABEL: test_mm_add_sd
42 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
43 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
44 // CHECK: fadd double
45 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
46 return _mm_add_sd(A, B);
47 }
48
test_mm_adds_epi8(__m128i A,__m128i B)49 __m128i test_mm_adds_epi8(__m128i A, __m128i B) {
50 // CHECK-LABEL: test_mm_adds_epi8
51 // CHECK: call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
52 return _mm_adds_epi8(A, B);
53 }
54
test_mm_adds_epi16(__m128i A,__m128i B)55 __m128i test_mm_adds_epi16(__m128i A, __m128i B) {
56 // CHECK-LABEL: test_mm_adds_epi16
57 // CHECK: call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
58 return _mm_adds_epi16(A, B);
59 }
60
test_mm_adds_epu8(__m128i A,__m128i B)61 __m128i test_mm_adds_epu8(__m128i A, __m128i B) {
62 // CHECK-LABEL: test_mm_adds_epu8
63 // CHECK-NOT: call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
64 // CHECK: call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
65 return _mm_adds_epu8(A, B);
66 }
67
test_mm_adds_epu16(__m128i A,__m128i B)68 __m128i test_mm_adds_epu16(__m128i A, __m128i B) {
69 // CHECK-LABEL: test_mm_adds_epu16
70 // CHECK-NOT: call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
71 // CHECK: call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
72 return _mm_adds_epu16(A, B);
73 }
74
test_mm_and_pd(__m128d A,__m128d B)75 __m128d test_mm_and_pd(__m128d A, __m128d B) {
76 // CHECK-LABEL: test_mm_and_pd
77 // CHECK: and <2 x i64>
78 return _mm_and_pd(A, B);
79 }
80
test_mm_and_si128(__m128i A,__m128i B)81 __m128i test_mm_and_si128(__m128i A, __m128i B) {
82 // CHECK-LABEL: test_mm_and_si128
83 // CHECK: and <2 x i64>
84 return _mm_and_si128(A, B);
85 }
86
test_mm_andnot_pd(__m128d A,__m128d B)87 __m128d test_mm_andnot_pd(__m128d A, __m128d B) {
88 // CHECK-LABEL: test_mm_andnot_pd
89 // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
90 // CHECK: and <2 x i64>
91 return _mm_andnot_pd(A, B);
92 }
93
test_mm_andnot_si128(__m128i A,__m128i B)94 __m128i test_mm_andnot_si128(__m128i A, __m128i B) {
95 // CHECK-LABEL: test_mm_andnot_si128
96 // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
97 // CHECK: and <2 x i64>
98 return _mm_andnot_si128(A, B);
99 }
100
test_mm_avg_epu8(__m128i A,__m128i B)101 __m128i test_mm_avg_epu8(__m128i A, __m128i B) {
102 // CHECK-LABEL: test_mm_avg_epu8
103 // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
104 return _mm_avg_epu8(A, B);
105 }
106
test_mm_avg_epu16(__m128i A,__m128i B)107 __m128i test_mm_avg_epu16(__m128i A, __m128i B) {
108 // CHECK-LABEL: test_mm_avg_epu16
109 // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
110 return _mm_avg_epu16(A, B);
111 }
112
test_mm_bslli_si128(__m128i A)113 __m128i test_mm_bslli_si128(__m128i A) {
114 // CHECK-LABEL: test_mm_bslli_si128
115 // CHECK: shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
116 return _mm_bslli_si128(A, 5);
117 }
118
test_mm_bsrli_si128(__m128i A)119 __m128i test_mm_bsrli_si128(__m128i A) {
120 // CHECK-LABEL: test_mm_bsrli_si128
121 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
122 return _mm_bsrli_si128(A, 5);
123 }
124
test_mm_castpd_ps(__m128d A)125 __m128 test_mm_castpd_ps(__m128d A) {
126 // CHECK-LABEL: test_mm_castpd_ps
127 // CHECK: bitcast <2 x double> %{{.*}} to <4 x float>
128 return _mm_castpd_ps(A);
129 }
130
test_mm_castpd_si128(__m128d A)131 __m128i test_mm_castpd_si128(__m128d A) {
132 // CHECK-LABEL: test_mm_castpd_si128
133 // CHECK: bitcast <2 x double> %{{.*}} to <2 x i64>
134 return _mm_castpd_si128(A);
135 }
136
test_mm_castps_pd(__m128 A)137 __m128d test_mm_castps_pd(__m128 A) {
138 // CHECK-LABEL: test_mm_castps_pd
139 // CHECK: bitcast <4 x float> %{{.*}} to <2 x double>
140 return _mm_castps_pd(A);
141 }
142
test_mm_castps_si128(__m128 A)143 __m128i test_mm_castps_si128(__m128 A) {
144 // CHECK-LABEL: test_mm_castps_si128
145 // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64>
146 return _mm_castps_si128(A);
147 }
148
test_mm_castsi128_pd(__m128i A)149 __m128d test_mm_castsi128_pd(__m128i A) {
150 // CHECK-LABEL: test_mm_castsi128_pd
151 // CHECK: bitcast <2 x i64> %{{.*}} to <2 x double>
152 return _mm_castsi128_pd(A);
153 }
154
test_mm_castsi128_ps(__m128i A)155 __m128 test_mm_castsi128_ps(__m128i A) {
156 // CHECK-LABEL: test_mm_castsi128_ps
157 // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
158 return _mm_castsi128_ps(A);
159 }
160
test_mm_clflush(void * A)161 void test_mm_clflush(void* A) {
162 // CHECK-LABEL: test_mm_clflush
163 // CHECK: call void @llvm.x86.sse2.clflush(i8* %{{.*}})
164 _mm_clflush(A);
165 }
166
test_mm_cmpeq_epi8(__m128i A,__m128i B)167 __m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) {
168 // CHECK-LABEL: test_mm_cmpeq_epi8
169 // CHECK: icmp eq <16 x i8>
170 return _mm_cmpeq_epi8(A, B);
171 }
172
test_mm_cmpeq_epi16(__m128i A,__m128i B)173 __m128i test_mm_cmpeq_epi16(__m128i A, __m128i B) {
174 // CHECK-LABEL: test_mm_cmpeq_epi16
175 // CHECK: icmp eq <8 x i16>
176 return _mm_cmpeq_epi16(A, B);
177 }
178
test_mm_cmpeq_epi32(__m128i A,__m128i B)179 __m128i test_mm_cmpeq_epi32(__m128i A, __m128i B) {
180 // CHECK-LABEL: test_mm_cmpeq_epi32
181 // CHECK: icmp eq <4 x i32>
182 return _mm_cmpeq_epi32(A, B);
183 }
184
test_mm_cmpeq_pd(__m128d A,__m128d B)185 __m128d test_mm_cmpeq_pd(__m128d A, __m128d B) {
186 // CHECK-LABEL: test_mm_cmpeq_pd
187 // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double>
188 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
189 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
190 // CHECK-NEXT: ret <2 x double> [[BC]]
191 return _mm_cmpeq_pd(A, B);
192 }
193
test_mm_cmpeq_sd(__m128d A,__m128d B)194 __m128d test_mm_cmpeq_sd(__m128d A, __m128d B) {
195 // CHECK-LABEL: test_mm_cmpeq_sd
196 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
197 return _mm_cmpeq_sd(A, B);
198 }
199
test_mm_cmpge_pd(__m128d A,__m128d B)200 __m128d test_mm_cmpge_pd(__m128d A, __m128d B) {
201 // CHECK-LABEL: test_mm_cmpge_pd
202 // CHECK: [[CMP:%.*]] = fcmp ole <2 x double>
203 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
204 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
205 // CHECK-NEXT: ret <2 x double> [[BC]]
206 return _mm_cmpge_pd(A, B);
207 }
208
test_mm_cmpge_sd(__m128d A,__m128d B)209 __m128d test_mm_cmpge_sd(__m128d A, __m128d B) {
210 // CHECK-LABEL: test_mm_cmpge_sd
211 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
212 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
213 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
214 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
215 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
216 return _mm_cmpge_sd(A, B);
217 }
218
test_mm_cmpgt_epi8(__m128i A,__m128i B)219 __m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) {
220 // CHECK-LABEL: test_mm_cmpgt_epi8
221 // CHECK: icmp sgt <16 x i8>
222 return _mm_cmpgt_epi8(A, B);
223 }
224
test_mm_cmpgt_epi16(__m128i A,__m128i B)225 __m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) {
226 // CHECK-LABEL: test_mm_cmpgt_epi16
227 // CHECK: icmp sgt <8 x i16>
228 return _mm_cmpgt_epi16(A, B);
229 }
230
test_mm_cmpgt_epi32(__m128i A,__m128i B)231 __m128i test_mm_cmpgt_epi32(__m128i A, __m128i B) {
232 // CHECK-LABEL: test_mm_cmpgt_epi32
233 // CHECK: icmp sgt <4 x i32>
234 return _mm_cmpgt_epi32(A, B);
235 }
236
test_mm_cmpgt_pd(__m128d A,__m128d B)237 __m128d test_mm_cmpgt_pd(__m128d A, __m128d B) {
238 // CHECK-LABEL: test_mm_cmpgt_pd
239 // CHECK: [[CMP:%.*]] = fcmp olt <2 x double>
240 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
241 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
242 // CHECK-NEXT: ret <2 x double> [[BC]]
243 return _mm_cmpgt_pd(A, B);
244 }
245
test_mm_cmpgt_sd(__m128d A,__m128d B)246 __m128d test_mm_cmpgt_sd(__m128d A, __m128d B) {
247 // CHECK-LABEL: test_mm_cmpgt_sd
248 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
249 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
250 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
251 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
252 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
253 return _mm_cmpgt_sd(A, B);
254 }
255
test_mm_cmple_pd(__m128d A,__m128d B)256 __m128d test_mm_cmple_pd(__m128d A, __m128d B) {
257 // CHECK-LABEL: test_mm_cmple_pd
258 // CHECK: [[CMP:%.*]] = fcmp ole <2 x double>
259 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
260 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
261 // CHECK-NEXT: ret <2 x double> [[BC]]
262 return _mm_cmple_pd(A, B);
263 }
264
test_mm_cmple_sd(__m128d A,__m128d B)265 __m128d test_mm_cmple_sd(__m128d A, __m128d B) {
266 // CHECK-LABEL: test_mm_cmple_sd
267 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
268 return _mm_cmple_sd(A, B);
269 }
270
test_mm_cmplt_epi8(__m128i A,__m128i B)271 __m128i test_mm_cmplt_epi8(__m128i A, __m128i B) {
272 // CHECK-LABEL: test_mm_cmplt_epi8
273 // CHECK: icmp sgt <16 x i8>
274 return _mm_cmplt_epi8(A, B);
275 }
276
test_mm_cmplt_epi16(__m128i A,__m128i B)277 __m128i test_mm_cmplt_epi16(__m128i A, __m128i B) {
278 // CHECK-LABEL: test_mm_cmplt_epi16
279 // CHECK: icmp sgt <8 x i16>
280 return _mm_cmplt_epi16(A, B);
281 }
282
test_mm_cmplt_epi32(__m128i A,__m128i B)283 __m128i test_mm_cmplt_epi32(__m128i A, __m128i B) {
284 // CHECK-LABEL: test_mm_cmplt_epi32
285 // CHECK: icmp sgt <4 x i32>
286 return _mm_cmplt_epi32(A, B);
287 }
288
test_mm_cmplt_pd(__m128d A,__m128d B)289 __m128d test_mm_cmplt_pd(__m128d A, __m128d B) {
290 // CHECK-LABEL: test_mm_cmplt_pd
291 // CHECK: [[CMP:%.*]] = fcmp olt <2 x double>
292 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
293 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
294 // CHECK-NEXT: ret <2 x double> [[BC]]
295 return _mm_cmplt_pd(A, B);
296 }
297
test_mm_cmplt_sd(__m128d A,__m128d B)298 __m128d test_mm_cmplt_sd(__m128d A, __m128d B) {
299 // CHECK-LABEL: test_mm_cmplt_sd
300 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
301 return _mm_cmplt_sd(A, B);
302 }
303
test_mm_cmpneq_pd(__m128d A,__m128d B)304 __m128d test_mm_cmpneq_pd(__m128d A, __m128d B) {
305 // CHECK-LABEL: test_mm_cmpneq_pd
306 // CHECK: [[CMP:%.*]] = fcmp une <2 x double>
307 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
308 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
309 // CHECK-NEXT: ret <2 x double> [[BC]]
310 return _mm_cmpneq_pd(A, B);
311 }
312
test_mm_cmpneq_sd(__m128d A,__m128d B)313 __m128d test_mm_cmpneq_sd(__m128d A, __m128d B) {
314 // CHECK-LABEL: test_mm_cmpneq_sd
315 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
316 return _mm_cmpneq_sd(A, B);
317 }
318
test_mm_cmpnge_pd(__m128d A,__m128d B)319 __m128d test_mm_cmpnge_pd(__m128d A, __m128d B) {
320 // CHECK-LABEL: test_mm_cmpnge_pd
321 // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double>
322 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
323 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
324 // CHECK-NEXT: ret <2 x double> [[BC]]
325 return _mm_cmpnge_pd(A, B);
326 }
327
test_mm_cmpnge_sd(__m128d A,__m128d B)328 __m128d test_mm_cmpnge_sd(__m128d A, __m128d B) {
329 // CHECK-LABEL: test_mm_cmpnge_sd
330 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
331 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
332 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
333 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
334 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
335 return _mm_cmpnge_sd(A, B);
336 }
337
test_mm_cmpngt_pd(__m128d A,__m128d B)338 __m128d test_mm_cmpngt_pd(__m128d A, __m128d B) {
339 // CHECK-LABEL: test_mm_cmpngt_pd
340 // CHECK: [[CMP:%.*]] = fcmp uge <2 x double>
341 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
342 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
343 // CHECK-NEXT: ret <2 x double> [[BC]]
344 return _mm_cmpngt_pd(A, B);
345 }
346
test_mm_cmpngt_sd(__m128d A,__m128d B)347 __m128d test_mm_cmpngt_sd(__m128d A, __m128d B) {
348 // CHECK-LABEL: test_mm_cmpngt_sd
349 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
350 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
351 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
352 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
353 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
354 return _mm_cmpngt_sd(A, B);
355 }
356
test_mm_cmpnle_pd(__m128d A,__m128d B)357 __m128d test_mm_cmpnle_pd(__m128d A, __m128d B) {
358 // CHECK-LABEL: test_mm_cmpnle_pd
359 // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double>
360 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
361 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
362 // CHECK-NEXT: ret <2 x double> [[BC]]
363 return _mm_cmpnle_pd(A, B);
364 }
365
test_mm_cmpnle_sd(__m128d A,__m128d B)366 __m128d test_mm_cmpnle_sd(__m128d A, __m128d B) {
367 // CHECK-LABEL: test_mm_cmpnle_sd
368 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
369 return _mm_cmpnle_sd(A, B);
370 }
371
test_mm_cmpnlt_pd(__m128d A,__m128d B)372 __m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) {
373 // CHECK-LABEL: test_mm_cmpnlt_pd
374 // CHECK: [[CMP:%.*]] = fcmp uge <2 x double>
375 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
376 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
377 // CHECK-NEXT: ret <2 x double> [[BC]]
378 return _mm_cmpnlt_pd(A, B);
379 }
380
test_mm_cmpnlt_sd(__m128d A,__m128d B)381 __m128d test_mm_cmpnlt_sd(__m128d A, __m128d B) {
382 // CHECK-LABEL: test_mm_cmpnlt_sd
383 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
384 return _mm_cmpnlt_sd(A, B);
385 }
386
test_mm_cmpord_pd(__m128d A,__m128d B)387 __m128d test_mm_cmpord_pd(__m128d A, __m128d B) {
388 // CHECK-LABEL: test_mm_cmpord_pd
389 // CHECK: [[CMP:%.*]] = fcmp ord <2 x double>
390 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
391 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
392 // CHECK-NEXT: ret <2 x double> [[BC]]
393 return _mm_cmpord_pd(A, B);
394 }
395
test_mm_cmpord_sd(__m128d A,__m128d B)396 __m128d test_mm_cmpord_sd(__m128d A, __m128d B) {
397 // CHECK-LABEL: test_mm_cmpord_sd
398 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
399 return _mm_cmpord_sd(A, B);
400 }
401
test_mm_cmpunord_pd(__m128d A,__m128d B)402 __m128d test_mm_cmpunord_pd(__m128d A, __m128d B) {
403 // CHECK-LABEL: test_mm_cmpunord_pd
404 // CHECK: [[CMP:%.*]] = fcmp uno <2 x double>
405 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
406 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
407 // CHECK-NEXT: ret <2 x double> [[BC]]
408 return _mm_cmpunord_pd(A, B);
409 }
410
test_mm_cmpunord_sd(__m128d A,__m128d B)411 __m128d test_mm_cmpunord_sd(__m128d A, __m128d B) {
412 // CHECK-LABEL: test_mm_cmpunord_sd
413 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
414 return _mm_cmpunord_sd(A, B);
415 }
416
test_mm_comieq_sd(__m128d A,__m128d B)417 int test_mm_comieq_sd(__m128d A, __m128d B) {
418 // CHECK-LABEL: test_mm_comieq_sd
419 // CHECK: call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
420 return _mm_comieq_sd(A, B);
421 }
422
test_mm_comige_sd(__m128d A,__m128d B)423 int test_mm_comige_sd(__m128d A, __m128d B) {
424 // CHECK-LABEL: test_mm_comige_sd
425 // CHECK: call i32 @llvm.x86.sse2.comige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
426 return _mm_comige_sd(A, B);
427 }
428
test_mm_comigt_sd(__m128d A,__m128d B)429 int test_mm_comigt_sd(__m128d A, __m128d B) {
430 // CHECK-LABEL: test_mm_comigt_sd
431 // CHECK: call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
432 return _mm_comigt_sd(A, B);
433 }
434
test_mm_comile_sd(__m128d A,__m128d B)435 int test_mm_comile_sd(__m128d A, __m128d B) {
436 // CHECK-LABEL: test_mm_comile_sd
437 // CHECK: call i32 @llvm.x86.sse2.comile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
438 return _mm_comile_sd(A, B);
439 }
440
test_mm_comilt_sd(__m128d A,__m128d B)441 int test_mm_comilt_sd(__m128d A, __m128d B) {
442 // CHECK-LABEL: test_mm_comilt_sd
443 // CHECK: call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
444 return _mm_comilt_sd(A, B);
445 }
446
test_mm_comineq_sd(__m128d A,__m128d B)447 int test_mm_comineq_sd(__m128d A, __m128d B) {
448 // CHECK-LABEL: test_mm_comineq_sd
449 // CHECK: call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
450 return _mm_comineq_sd(A, B);
451 }
452
test_mm_cvtepi32_pd(__m128i A)453 __m128d test_mm_cvtepi32_pd(__m128i A) {
454 // CHECK-LABEL: test_mm_cvtepi32_pd
455 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> <i32 0, i32 1>
456 // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double>
457 return _mm_cvtepi32_pd(A);
458 }
459
test_mm_cvtepi32_ps(__m128i A)460 __m128 test_mm_cvtepi32_ps(__m128i A) {
461 // CHECK-LABEL: test_mm_cvtepi32_ps
462 // CHECK: sitofp <4 x i32> %{{.*}} to <4 x float>
463 return _mm_cvtepi32_ps(A);
464 }
465
test_mm_cvtpd_epi32(__m128d A)466 __m128i test_mm_cvtpd_epi32(__m128d A) {
467 // CHECK-LABEL: test_mm_cvtpd_epi32
468 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %{{.*}})
469 return _mm_cvtpd_epi32(A);
470 }
471
test_mm_cvtpd_ps(__m128d A)472 __m128 test_mm_cvtpd_ps(__m128d A) {
473 // CHECK-LABEL: test_mm_cvtpd_ps
474 // CHECK: call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %{{.*}})
475 return _mm_cvtpd_ps(A);
476 }
477
test_mm_cvtps_epi32(__m128 A)478 __m128i test_mm_cvtps_epi32(__m128 A) {
479 // CHECK-LABEL: test_mm_cvtps_epi32
480 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %{{.*}})
481 return _mm_cvtps_epi32(A);
482 }
483
test_mm_cvtps_pd(__m128 A)484 __m128d test_mm_cvtps_pd(__m128 A) {
485 // CHECK-LABEL: test_mm_cvtps_pd
486 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 0, i32 1>
487 // CHECK: fpext <2 x float> %{{.*}} to <2 x double>
488 return _mm_cvtps_pd(A);
489 }
490
test_mm_cvtsd_f64(__m128d A)491 double test_mm_cvtsd_f64(__m128d A) {
492 // CHECK-LABEL: test_mm_cvtsd_f64
493 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
494 return _mm_cvtsd_f64(A);
495 }
496
test_mm_cvtsd_si32(__m128d A)497 int test_mm_cvtsd_si32(__m128d A) {
498 // CHECK-LABEL: test_mm_cvtsd_si32
499 // CHECK: call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %{{.*}})
500 return _mm_cvtsd_si32(A);
501 }
502
503 #ifdef __x86_64__
test_mm_cvtsd_si64(__m128d A)504 long long test_mm_cvtsd_si64(__m128d A) {
505 // CHECK-LABEL: test_mm_cvtsd_si64
506 // CHECK: call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %{{.*}})
507 return _mm_cvtsd_si64(A);
508 }
509 #endif
510
test_mm_cvtsd_ss(__m128 A,__m128d B)511 __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) {
512 // CHECK-LABEL: test_mm_cvtsd_ss
513 // CHECK: call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %{{.*}}, <2 x double> %{{.*}})
514 return _mm_cvtsd_ss(A, B);
515 }
516
test_mm_cvtsi128_si32(__m128i A)517 int test_mm_cvtsi128_si32(__m128i A) {
518 // CHECK-LABEL: test_mm_cvtsi128_si32
519 // CHECK: extractelement <4 x i32> %{{.*}}, i32 0
520 return _mm_cvtsi128_si32(A);
521 }
522
523 #ifdef __x86_64__
test_mm_cvtsi128_si64(__m128i A)524 long long test_mm_cvtsi128_si64(__m128i A) {
525 // CHECK-LABEL: test_mm_cvtsi128_si64
526 // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
527 return _mm_cvtsi128_si64(A);
528 }
529 #endif
530
test_mm_cvtsi32_sd(__m128d A,int B)531 __m128d test_mm_cvtsi32_sd(__m128d A, int B) {
532 // CHECK-LABEL: test_mm_cvtsi32_sd
533 // CHECK: sitofp i32 %{{.*}} to double
534 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
535 return _mm_cvtsi32_sd(A, B);
536 }
537
test_mm_cvtsi32_si128(int A)538 __m128i test_mm_cvtsi32_si128(int A) {
539 // CHECK-LABEL: test_mm_cvtsi32_si128
540 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
541 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 1
542 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 2
543 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 3
544 return _mm_cvtsi32_si128(A);
545 }
546
547 #ifdef __x86_64__
test_mm_cvtsi64_sd(__m128d A,long long B)548 __m128d test_mm_cvtsi64_sd(__m128d A, long long B) {
549 // CHECK-LABEL: test_mm_cvtsi64_sd
550 // CHECK: sitofp i64 %{{.*}} to double
551 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
552 return _mm_cvtsi64_sd(A, B);
553 }
554
test_mm_cvtsi64_si128(long long A)555 __m128i test_mm_cvtsi64_si128(long long A) {
556 // CHECK-LABEL: test_mm_cvtsi64_si128
557 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
558 // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
559 return _mm_cvtsi64_si128(A);
560 }
561 #endif
562
test_mm_cvtss_sd(__m128d A,__m128 B)563 __m128d test_mm_cvtss_sd(__m128d A, __m128 B) {
564 // CHECK-LABEL: test_mm_cvtss_sd
565 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
566 // CHECK: fpext float %{{.*}} to double
567 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
568 return _mm_cvtss_sd(A, B);
569 }
570
test_mm_cvttpd_epi32(__m128d A)571 __m128i test_mm_cvttpd_epi32(__m128d A) {
572 // CHECK-LABEL: test_mm_cvttpd_epi32
573 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %{{.*}})
574 return _mm_cvttpd_epi32(A);
575 }
576
test_mm_cvttps_epi32(__m128 A)577 __m128i test_mm_cvttps_epi32(__m128 A) {
578 // CHECK-LABEL: test_mm_cvttps_epi32
579 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %{{.*}})
580 return _mm_cvttps_epi32(A);
581 }
582
test_mm_cvttsd_si32(__m128d A)583 int test_mm_cvttsd_si32(__m128d A) {
584 // CHECK-LABEL: test_mm_cvttsd_si32
585 // CHECK: call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %{{.*}})
586 return _mm_cvttsd_si32(A);
587 }
588
589 #ifdef __x86_64__
test_mm_cvttsd_si64(__m128d A)590 long long test_mm_cvttsd_si64(__m128d A) {
591 // CHECK-LABEL: test_mm_cvttsd_si64
592 // CHECK: call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %{{.*}})
593 return _mm_cvttsd_si64(A);
594 }
595 #endif
596
test_mm_div_pd(__m128d A,__m128d B)597 __m128d test_mm_div_pd(__m128d A, __m128d B) {
598 // CHECK-LABEL: test_mm_div_pd
599 // CHECK: fdiv <2 x double>
600 return _mm_div_pd(A, B);
601 }
602
test_mm_div_sd(__m128d A,__m128d B)603 __m128d test_mm_div_sd(__m128d A, __m128d B) {
604 // CHECK-LABEL: test_mm_div_sd
605 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
606 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
607 // CHECK: fdiv double
608 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
609 return _mm_div_sd(A, B);
610 }
611
612 // Lowering to pextrw requires optimization.
test_mm_extract_epi16(__m128i A)613 int test_mm_extract_epi16(__m128i A) {
614 // CHECK-LABEL: test_mm_extract_epi16
615 // CHECK: extractelement <8 x i16> %{{.*}}, {{i32|i64}} 1
616 // CHECK: zext i16 %{{.*}} to i32
617 return _mm_extract_epi16(A, 1);
618 }
619
test_mm_insert_epi16(__m128i A,int B)620 __m128i test_mm_insert_epi16(__m128i A, int B) {
621 // CHECK-LABEL: test_mm_insert_epi16
622 // CHECK: insertelement <8 x i16> %{{.*}}, {{i32|i64}} 0
623 return _mm_insert_epi16(A, B, 0);
624 }
625
test_mm_lfence()626 void test_mm_lfence() {
627 // CHECK-LABEL: test_mm_lfence
628 // CHECK: call void @llvm.x86.sse2.lfence()
629 _mm_lfence();
630 }
631
test_mm_load_pd(double const * A)632 __m128d test_mm_load_pd(double const* A) {
633 // CHECK-LABEL: test_mm_load_pd
634 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
635 return _mm_load_pd(A);
636 }
637
test_mm_load_pd1(double const * A)638 __m128d test_mm_load_pd1(double const* A) {
639 // CHECK-LABEL: test_mm_load_pd1
640 // CHECK: load double, double* %{{.*}}, align 8
641 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
642 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
643 return _mm_load_pd1(A);
644 }
645
test_mm_load_sd(double const * A)646 __m128d test_mm_load_sd(double const* A) {
647 // CHECK-LABEL: test_mm_load_sd
648 // CHECK: load double, double* %{{.*}}, align 1{{$}}
649 return _mm_load_sd(A);
650 }
651
test_mm_load_si128(__m128i const * A)652 __m128i test_mm_load_si128(__m128i const* A) {
653 // CHECK-LABEL: test_mm_load_si128
654 // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
655 return _mm_load_si128(A);
656 }
657
test_mm_load1_pd(double const * A)658 __m128d test_mm_load1_pd(double const* A) {
659 // CHECK-LABEL: test_mm_load1_pd
660 // CHECK: load double, double* %{{.*}}, align 8
661 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
662 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
663 return _mm_load1_pd(A);
664 }
665
test_mm_loadh_pd(__m128d x,void * y)666 __m128d test_mm_loadh_pd(__m128d x, void* y) {
667 // CHECK-LABEL: test_mm_loadh_pd
668 // CHECK: load double, double* %{{.*}}, align 1{{$}}
669 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
670 return _mm_loadh_pd(x, y);
671 }
672
test_mm_loadl_epi64(__m128i * y)673 __m128i test_mm_loadl_epi64(__m128i* y) {
674 // CHECK: test_mm_loadl_epi64
675 // CHECK: load i64, i64* {{.*}}, align 1{{$}}
676 // CHECK: insertelement <2 x i64> undef, i64 {{.*}}, i32 0
677 // CHECK: insertelement <2 x i64> {{.*}}, i64 0, i32 1
678 return _mm_loadl_epi64(y);
679 }
680
test_mm_loadl_pd(__m128d x,void * y)681 __m128d test_mm_loadl_pd(__m128d x, void* y) {
682 // CHECK-LABEL: test_mm_loadl_pd
683 // CHECK: load double, double* %{{.*}}, align 1{{$}}
684 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
685 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
686 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
687 return _mm_loadl_pd(x, y);
688 }
689
test_mm_loadr_pd(double const * A)690 __m128d test_mm_loadr_pd(double const* A) {
691 // CHECK-LABEL: test_mm_loadr_pd
692 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
693 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
694 return _mm_loadr_pd(A);
695 }
696
test_mm_loadu_pd(double const * A)697 __m128d test_mm_loadu_pd(double const* A) {
698 // CHECK-LABEL: test_mm_loadu_pd
699 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1{{$}}
700 return _mm_loadu_pd(A);
701 }
702
test_mm_loadu_si128(__m128i const * A)703 __m128i test_mm_loadu_si128(__m128i const* A) {
704 // CHECK-LABEL: test_mm_loadu_si128
705 // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1{{$}}
706 return _mm_loadu_si128(A);
707 }
708
test_mm_loadu_si64(void const * A)709 __m128i test_mm_loadu_si64(void const* A) {
710 // CHECK-LABEL: test_mm_loadu_si64
711 // CHECK: load i64, i64* %{{.*}}, align 1{{$}}
712 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
713 // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
714 return _mm_loadu_si64(A);
715 }
716
test_mm_loadu_si32(void const * A)717 __m128i test_mm_loadu_si32(void const* A) {
718 // CHECK-LABEL: test_mm_loadu_si32
719 // CHECK: load i32, i32* %{{.*}}, align 1{{$}}
720 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
721 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 1
722 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 2
723 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 3
724 return _mm_loadu_si32(A);
725 }
726
test_mm_loadu_si16(void const * A)727 __m128i test_mm_loadu_si16(void const* A) {
728 // CHECK-LABEL: test_mm_loadu_si16
729 // CHECK: load i16, i16* %{{.*}}, align 1{{$}}
730 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
731 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 1
732 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 2
733 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 3
734 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 4
735 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5
736 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6
737 // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7
738 return _mm_loadu_si16(A);
739 }
740
test_mm_madd_epi16(__m128i A,__m128i B)741 __m128i test_mm_madd_epi16(__m128i A, __m128i B) {
742 // CHECK-LABEL: test_mm_madd_epi16
743 // CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
744 return _mm_madd_epi16(A, B);
745 }
746
test_mm_maskmoveu_si128(__m128i A,__m128i B,char * C)747 void test_mm_maskmoveu_si128(__m128i A, __m128i B, char* C) {
748 // CHECK-LABEL: test_mm_maskmoveu_si128
749 // CHECK: call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8* %{{.*}})
750 _mm_maskmoveu_si128(A, B, C);
751 }
752
test_mm_max_epi16(__m128i A,__m128i B)753 __m128i test_mm_max_epi16(__m128i A, __m128i B) {
754 // CHECK-LABEL: test_mm_max_epi16
755 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[X:%.*]], [[Y:%.*]]
756 // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]]
757 return _mm_max_epi16(A, B);
758 }
759
test_mm_max_epu8(__m128i A,__m128i B)760 __m128i test_mm_max_epu8(__m128i A, __m128i B) {
761 // CHECK-LABEL: test_mm_max_epu8
762 // CHECK: [[CMP:%.*]] = icmp ugt <16 x i8> [[X:%.*]], [[Y:%.*]]
763 // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]]
764 return _mm_max_epu8(A, B);
765 }
766
test_mm_max_pd(__m128d A,__m128d B)767 __m128d test_mm_max_pd(__m128d A, __m128d B) {
768 // CHECK-LABEL: test_mm_max_pd
769 // CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
770 return _mm_max_pd(A, B);
771 }
772
test_mm_max_sd(__m128d A,__m128d B)773 __m128d test_mm_max_sd(__m128d A, __m128d B) {
774 // CHECK-LABEL: test_mm_max_sd
775 // CHECK: call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
776 return _mm_max_sd(A, B);
777 }
778
test_mm_mfence()779 void test_mm_mfence() {
780 // CHECK-LABEL: test_mm_mfence
781 // CHECK: call void @llvm.x86.sse2.mfence()
782 _mm_mfence();
783 }
784
test_mm_min_epi16(__m128i A,__m128i B)785 __m128i test_mm_min_epi16(__m128i A, __m128i B) {
786 // CHECK-LABEL: test_mm_min_epi16
787 // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> [[X:%.*]], [[Y:%.*]]
788 // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]]
789 return _mm_min_epi16(A, B);
790 }
791
test_mm_min_epu8(__m128i A,__m128i B)792 __m128i test_mm_min_epu8(__m128i A, __m128i B) {
793 // CHECK-LABEL: test_mm_min_epu8
794 // CHECK: [[CMP:%.*]] = icmp ult <16 x i8> [[X:%.*]], [[Y:%.*]]
795 // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]]
796 return _mm_min_epu8(A, B);
797 }
798
test_mm_min_pd(__m128d A,__m128d B)799 __m128d test_mm_min_pd(__m128d A, __m128d B) {
800 // CHECK-LABEL: test_mm_min_pd
801 // CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
802 return _mm_min_pd(A, B);
803 }
804
test_mm_min_sd(__m128d A,__m128d B)805 __m128d test_mm_min_sd(__m128d A, __m128d B) {
806 // CHECK-LABEL: test_mm_min_sd
807 // CHECK: call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
808 return _mm_min_sd(A, B);
809 }
810
test_mm_movepi64_pi64(__m128i A)811 __m64 test_mm_movepi64_pi64(__m128i A)
812 {
813 // CHECK-LABEL: test_mm_movepi64_pi64
814 // CHECK: [[EXT:%.*]] = extractelement <2 x i64> %1, i32 0
815 // CHECK: bitcast i64 [[EXT]] to <1 x i64>
816 return _mm_movepi64_pi64(A);
817 }
818
test_mm_movpi64_epi64(__m64 A)819 __m128i test_mm_movpi64_epi64(__m64 A)
820 {
821 // CHECK-LABEL: test_mm_movpi64_epi64
822 // CHECK: [[CAST:%.*]] = bitcast <1 x i64> %{{.*}} to i64
823 // CHECK: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[CAST]], i32 0
824 // CHECK: insertelement <2 x i64> [[INS]], i64 0, i32 1
825 return _mm_movpi64_epi64(A);
826 }
827
test_mm_move_epi64(__m128i A)828 __m128i test_mm_move_epi64(__m128i A) {
829 // CHECK-LABEL: test_mm_move_epi64
830 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
831 return _mm_move_epi64(A);
832 }
833
test_mm_move_sd(__m128d A,__m128d B)834 __m128d test_mm_move_sd(__m128d A, __m128d B) {
835 // CHECK-LABEL: test_mm_move_sd
836 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
837 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
838 return _mm_move_sd(A, B);
839 }
840
test_mm_movemask_epi8(__m128i A)841 int test_mm_movemask_epi8(__m128i A) {
842 // CHECK-LABEL: test_mm_movemask_epi8
843 // CHECK: call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}})
844 return _mm_movemask_epi8(A);
845 }
846
test_mm_movemask_pd(__m128d A)847 int test_mm_movemask_pd(__m128d A) {
848 // CHECK-LABEL: test_mm_movemask_pd
849 // CHECK: call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}})
850 return _mm_movemask_pd(A);
851 }
852
test_mm_mul_epu32(__m128i A,__m128i B)853 __m128i test_mm_mul_epu32(__m128i A, __m128i B) {
854 // CHECK-LABEL: test_mm_mul_epu32
855 // CHECK: and <2 x i64> %{{.*}}, <i64 4294967295, i64 4294967295>
856 // CHECK: and <2 x i64> %{{.*}}, <i64 4294967295, i64 4294967295>
857 // CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
858 return _mm_mul_epu32(A, B);
859 }
860
test_mm_mul_pd(__m128d A,__m128d B)861 __m128d test_mm_mul_pd(__m128d A, __m128d B) {
862 // CHECK-LABEL: test_mm_mul_pd
863 // CHECK: fmul <2 x double> %{{.*}}, %{{.*}}
864 return _mm_mul_pd(A, B);
865 }
866
test_mm_mul_sd(__m128d A,__m128d B)867 __m128d test_mm_mul_sd(__m128d A, __m128d B) {
868 // CHECK-LABEL: test_mm_mul_sd
869 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
870 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
871 // CHECK: fmul double
872 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
873 return _mm_mul_sd(A, B);
874 }
875
test_mm_mulhi_epi16(__m128i A,__m128i B)876 __m128i test_mm_mulhi_epi16(__m128i A, __m128i B) {
877 // CHECK-LABEL: test_mm_mulhi_epi16
878 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
879 return _mm_mulhi_epi16(A, B);
880 }
881
test_mm_mulhi_epu16(__m128i A,__m128i B)882 __m128i test_mm_mulhi_epu16(__m128i A, __m128i B) {
883 // CHECK-LABEL: test_mm_mulhi_epu16
884 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
885 return _mm_mulhi_epu16(A, B);
886 }
887
test_mm_mullo_epi16(__m128i A,__m128i B)888 __m128i test_mm_mullo_epi16(__m128i A, __m128i B) {
889 // CHECK-LABEL: test_mm_mullo_epi16
890 // CHECK: mul <8 x i16> %{{.*}}, %{{.*}}
891 return _mm_mullo_epi16(A, B);
892 }
893
test_mm_or_pd(__m128d A,__m128d B)894 __m128d test_mm_or_pd(__m128d A, __m128d B) {
895 // CHECK-LABEL: test_mm_or_pd
896 // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
897 return _mm_or_pd(A, B);
898 }
899
test_mm_or_si128(__m128i A,__m128i B)900 __m128i test_mm_or_si128(__m128i A, __m128i B) {
901 // CHECK-LABEL: test_mm_or_si128
902 // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
903 return _mm_or_si128(A, B);
904 }
905
test_mm_packs_epi16(__m128i A,__m128i B)906 __m128i test_mm_packs_epi16(__m128i A, __m128i B) {
907 // CHECK-LABEL: test_mm_packs_epi16
908 // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
909 return _mm_packs_epi16(A, B);
910 }
911
test_mm_packs_epi32(__m128i A,__m128i B)912 __m128i test_mm_packs_epi32(__m128i A, __m128i B) {
913 // CHECK-LABEL: test_mm_packs_epi32
914 // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
915 return _mm_packs_epi32(A, B);
916 }
917
test_mm_packus_epi16(__m128i A,__m128i B)918 __m128i test_mm_packus_epi16(__m128i A, __m128i B) {
919 // CHECK-LABEL: test_mm_packus_epi16
920 // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
921 return _mm_packus_epi16(A, B);
922 }
923
test_mm_pause()924 void test_mm_pause() {
925 // CHECK-LABEL: test_mm_pause
926 // CHECK: call void @llvm.x86.sse2.pause()
927 return _mm_pause();
928 }
929
test_mm_sad_epu8(__m128i A,__m128i B)930 __m128i test_mm_sad_epu8(__m128i A, __m128i B) {
931 // CHECK-LABEL: test_mm_sad_epu8
932 // CHECK: call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
933 return _mm_sad_epu8(A, B);
934 }
935
test_mm_set_epi8(char A,char B,char C,char D,char E,char F,char G,char H,char I,char J,char K,char L,char M,char N,char O,char P)936 __m128i test_mm_set_epi8(char A, char B, char C, char D,
937 char E, char F, char G, char H,
938 char I, char J, char K, char L,
939 char M, char N, char O, char P) {
940 // CHECK-LABEL: test_mm_set_epi8
941 // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
942 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
943 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
944 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
945 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
946 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
947 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
948 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
949 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
950 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
951 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
952 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
953 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
954 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
955 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
956 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
957 return _mm_set_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
958 }
959
test_mm_set_epi16(short A,short B,short C,short D,short E,short F,short G,short H)960 __m128i test_mm_set_epi16(short A, short B, short C, short D,
961 short E, short F, short G, short H) {
962 // CHECK-LABEL: test_mm_set_epi16
963 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
964 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
965 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
966 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
967 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
968 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
969 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
970 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
971 return _mm_set_epi16(A, B, C, D, E, F, G, H);
972 }
973
test_mm_set_epi32(int A,int B,int C,int D)974 __m128i test_mm_set_epi32(int A, int B, int C, int D) {
975 // CHECK-LABEL: test_mm_set_epi32
976 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
977 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
978 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
979 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
980 return _mm_set_epi32(A, B, C, D);
981 }
982
test_mm_set_epi64(__m64 A,__m64 B)983 __m128i test_mm_set_epi64(__m64 A, __m64 B) {
984 // CHECK-LABEL: test_mm_set_epi64
985 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
986 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
987 return _mm_set_epi64(A, B);
988 }
989
test_mm_set_epi64x(long long A,long long B)990 __m128i test_mm_set_epi64x(long long A, long long B) {
991 // CHECK-LABEL: test_mm_set_epi64x
992 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
993 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
994 return _mm_set_epi64x(A, B);
995 }
996
test_mm_set_pd(double A,double B)997 __m128d test_mm_set_pd(double A, double B) {
998 // CHECK-LABEL: test_mm_set_pd
999 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1000 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1001 return _mm_set_pd(A, B);
1002 }
1003
test_mm_set_pd1(double A)1004 __m128d test_mm_set_pd1(double A) {
1005 // CHECK-LABEL: test_mm_set_pd1
1006 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1007 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1008 return _mm_set_pd1(A);
1009 }
1010
test_mm_set_sd(double A)1011 __m128d test_mm_set_sd(double A) {
1012 // CHECK-LABEL: test_mm_set_sd
1013 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1014 // CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1
1015 return _mm_set_sd(A);
1016 }
1017
test_mm_set1_epi8(char A)1018 __m128i test_mm_set1_epi8(char A) {
1019 // CHECK-LABEL: test_mm_set1_epi8
1020 // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
1021 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
1022 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
1023 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
1024 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
1025 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
1026 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
1027 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
1028 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
1029 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
1030 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
1031 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
1032 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
1033 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
1034 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
1035 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
1036 return _mm_set1_epi8(A);
1037 }
1038
test_mm_set1_epi16(short A)1039 __m128i test_mm_set1_epi16(short A) {
1040 // CHECK-LABEL: test_mm_set1_epi16
1041 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
1042 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
1043 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
1044 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
1045 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
1046 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
1047 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
1048 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
1049 return _mm_set1_epi16(A);
1050 }
1051
test_mm_set1_epi32(int A)1052 __m128i test_mm_set1_epi32(int A) {
1053 // CHECK-LABEL: test_mm_set1_epi32
1054 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
1055 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
1056 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
1057 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
1058 return _mm_set1_epi32(A);
1059 }
1060
test_mm_set1_epi64(__m64 A)1061 __m128i test_mm_set1_epi64(__m64 A) {
1062 // CHECK-LABEL: test_mm_set1_epi64
1063 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1064 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1065 return _mm_set1_epi64(A);
1066 }
1067
test_mm_set1_epi64x(long long A)1068 __m128i test_mm_set1_epi64x(long long A) {
1069 // CHECK-LABEL: test_mm_set1_epi64x
1070 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1071 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1072 return _mm_set1_epi64x(A);
1073 }
1074
test_mm_set1_pd(double A)1075 __m128d test_mm_set1_pd(double A) {
1076 // CHECK-LABEL: test_mm_set1_pd
1077 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1078 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1079 return _mm_set1_pd(A);
1080 }
1081
test_mm_setr_epi8(char A,char B,char C,char D,char E,char F,char G,char H,char I,char J,char K,char L,char M,char N,char O,char P)1082 __m128i test_mm_setr_epi8(char A, char B, char C, char D,
1083 char E, char F, char G, char H,
1084 char I, char J, char K, char L,
1085 char M, char N, char O, char P) {
1086 // CHECK-LABEL: test_mm_setr_epi8
1087 // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
1088 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
1089 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
1090 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
1091 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
1092 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
1093 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
1094 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
1095 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
1096 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
1097 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
1098 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
1099 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
1100 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
1101 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
1102 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
1103 return _mm_setr_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
1104 }
1105
test_mm_setr_epi16(short A,short B,short C,short D,short E,short F,short G,short H)1106 __m128i test_mm_setr_epi16(short A, short B, short C, short D,
1107 short E, short F, short G, short H) {
1108 // CHECK-LABEL: test_mm_setr_epi16
1109 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
1110 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
1111 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
1112 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
1113 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
1114 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
1115 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
1116 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
1117 return _mm_setr_epi16(A, B, C, D, E, F, G, H);
1118 }
1119
test_mm_setr_epi32(int A,int B,int C,int D)1120 __m128i test_mm_setr_epi32(int A, int B, int C, int D) {
1121 // CHECK-LABEL: test_mm_setr_epi32
1122 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
1123 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
1124 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
1125 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
1126 return _mm_setr_epi32(A, B, C, D);
1127 }
1128
test_mm_setr_epi64(__m64 A,__m64 B)1129 __m128i test_mm_setr_epi64(__m64 A, __m64 B) {
1130 // CHECK-LABEL: test_mm_setr_epi64
1131 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1132 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1133 return _mm_setr_epi64(A, B);
1134 }
1135
test_mm_setr_pd(double A,double B)1136 __m128d test_mm_setr_pd(double A, double B) {
1137 // CHECK-LABEL: test_mm_setr_pd
1138 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1139 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1140 return _mm_setr_pd(A, B);
1141 }
1142
test_mm_setzero_pd()1143 __m128d test_mm_setzero_pd() {
1144 // CHECK-LABEL: test_mm_setzero_pd
1145 // CHECK: store <2 x double> zeroinitializer
1146 return _mm_setzero_pd();
1147 }
1148
test_mm_setzero_si128()1149 __m128i test_mm_setzero_si128() {
1150 // CHECK-LABEL: test_mm_setzero_si128
1151 // CHECK: store <2 x i64> zeroinitializer
1152 return _mm_setzero_si128();
1153 }
1154
test_mm_shuffle_epi32(__m128i A)1155 __m128i test_mm_shuffle_epi32(__m128i A) {
1156 // CHECK-LABEL: test_mm_shuffle_epi32
1157 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer
1158 return _mm_shuffle_epi32(A, 0);
1159 }
1160
test_mm_shuffle_pd(__m128d A,__m128d B)1161 __m128d test_mm_shuffle_pd(__m128d A, __m128d B) {
1162 // CHECK-LABEL: test_mm_shuffle_pd
1163 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 2>
1164 return _mm_shuffle_pd(A, B, 1);
1165 }
1166
test_mm_shufflehi_epi16(__m128i A)1167 __m128i test_mm_shufflehi_epi16(__m128i A) {
1168 // CHECK-LABEL: test_mm_shufflehi_epi16
1169 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
1170 return _mm_shufflehi_epi16(A, 0);
1171 }
1172
test_mm_shufflelo_epi16(__m128i A)1173 __m128i test_mm_shufflelo_epi16(__m128i A) {
1174 // CHECK-LABEL: test_mm_shufflelo_epi16
1175 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
1176 return _mm_shufflelo_epi16(A, 0);
1177 }
1178
test_mm_sll_epi16(__m128i A,__m128i B)1179 __m128i test_mm_sll_epi16(__m128i A, __m128i B) {
1180 // CHECK-LABEL: test_mm_sll_epi16
1181 // CHECK: call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1182 return _mm_sll_epi16(A, B);
1183 }
1184
test_mm_sll_epi32(__m128i A,__m128i B)1185 __m128i test_mm_sll_epi32(__m128i A, __m128i B) {
1186 // CHECK-LABEL: test_mm_sll_epi32
1187 // CHECK: call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1188 return _mm_sll_epi32(A, B);
1189 }
1190
test_mm_sll_epi64(__m128i A,__m128i B)1191 __m128i test_mm_sll_epi64(__m128i A, __m128i B) {
1192 // CHECK-LABEL: test_mm_sll_epi64
1193 // CHECK: call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
1194 return _mm_sll_epi64(A, B);
1195 }
1196
test_mm_slli_epi16(__m128i A)1197 __m128i test_mm_slli_epi16(__m128i A) {
1198 // CHECK-LABEL: test_mm_slli_epi16
1199 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1200 return _mm_slli_epi16(A, 1);
1201 }
1202
test_mm_slli_epi16_1(__m128i A)1203 __m128i test_mm_slli_epi16_1(__m128i A) {
1204 // CHECK-LABEL: test_mm_slli_epi16_1
1205 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1206 return _mm_slli_epi16(A, -1);
1207 }
1208
test_mm_slli_epi16_2(__m128i A,int B)1209 __m128i test_mm_slli_epi16_2(__m128i A, int B) {
1210 // CHECK-LABEL: test_mm_slli_epi16_2
1211 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1212 return _mm_slli_epi16(A, B);
1213 }
1214
test_mm_slli_epi32(__m128i A)1215 __m128i test_mm_slli_epi32(__m128i A) {
1216 // CHECK-LABEL: test_mm_slli_epi32
1217 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1218 return _mm_slli_epi32(A, 1);
1219 }
1220
test_mm_slli_epi32_1(__m128i A)1221 __m128i test_mm_slli_epi32_1(__m128i A) {
1222 // CHECK-LABEL: test_mm_slli_epi32_1
1223 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1224 return _mm_slli_epi32(A, -1);
1225 }
1226
test_mm_slli_epi32_2(__m128i A,int B)1227 __m128i test_mm_slli_epi32_2(__m128i A, int B) {
1228 // CHECK-LABEL: test_mm_slli_epi32_2
1229 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1230 return _mm_slli_epi32(A, B);
1231 }
1232
test_mm_slli_epi64(__m128i A)1233 __m128i test_mm_slli_epi64(__m128i A) {
1234 // CHECK-LABEL: test_mm_slli_epi64
1235 // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1236 return _mm_slli_epi64(A, 1);
1237 }
1238
test_mm_slli_epi64_1(__m128i A)1239 __m128i test_mm_slli_epi64_1(__m128i A) {
1240 // CHECK-LABEL: test_mm_slli_epi64_1
1241 // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1242 return _mm_slli_epi64(A, -1);
1243 }
1244
test_mm_slli_epi64_2(__m128i A,int B)1245 __m128i test_mm_slli_epi64_2(__m128i A, int B) {
1246 // CHECK-LABEL: test_mm_slli_epi64_2
1247 // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1248 return _mm_slli_epi64(A, B);
1249 }
1250
test_mm_slli_si128(__m128i A)1251 __m128i test_mm_slli_si128(__m128i A) {
1252 // CHECK-LABEL: test_mm_slli_si128
1253 // CHECK: shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
1254 return _mm_slli_si128(A, 5);
1255 }
1256
test_mm_slli_si128_2(__m128i A)1257 __m128i test_mm_slli_si128_2(__m128i A) {
1258 // CHECK-LABEL: test_mm_slli_si128_2
1259 // CHECK: ret <2 x i64> zeroinitializer
1260 return _mm_slli_si128(A, 17);
1261 }
1262
test_mm_sqrt_pd(__m128d A)1263 __m128d test_mm_sqrt_pd(__m128d A) {
1264 // CHECK-LABEL: test_mm_sqrt_pd
1265 // CHECK: call <2 x double> @llvm.sqrt.v2f64(<2 x double> %{{.*}})
1266 return _mm_sqrt_pd(A);
1267 }
1268
test_mm_sqrt_sd(__m128d A,__m128d B)1269 __m128d test_mm_sqrt_sd(__m128d A, __m128d B) {
1270 // CHECK-LABEL: test_mm_sqrt_sd
1271 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
1272 // CHECK: call double @llvm.sqrt.f64(double {{.*}})
1273 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
1274 return _mm_sqrt_sd(A, B);
1275 }
1276
test_mm_sra_epi16(__m128i A,__m128i B)1277 __m128i test_mm_sra_epi16(__m128i A, __m128i B) {
1278 // CHECK-LABEL: test_mm_sra_epi16
1279 // CHECK: call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1280 return _mm_sra_epi16(A, B);
1281 }
1282
test_mm_sra_epi32(__m128i A,__m128i B)1283 __m128i test_mm_sra_epi32(__m128i A, __m128i B) {
1284 // CHECK-LABEL: test_mm_sra_epi32
1285 // CHECK: call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1286 return _mm_sra_epi32(A, B);
1287 }
1288
test_mm_srai_epi16(__m128i A)1289 __m128i test_mm_srai_epi16(__m128i A) {
1290 // CHECK-LABEL: test_mm_srai_epi16
1291 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1292 return _mm_srai_epi16(A, 1);
1293 }
1294
test_mm_srai_epi16_1(__m128i A)1295 __m128i test_mm_srai_epi16_1(__m128i A) {
1296 // CHECK-LABEL: test_mm_srai_epi16_1
1297 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1298 return _mm_srai_epi16(A, -1);
1299 }
1300
test_mm_srai_epi16_2(__m128i A,int B)1301 __m128i test_mm_srai_epi16_2(__m128i A, int B) {
1302 // CHECK-LABEL: test_mm_srai_epi16_2
1303 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1304 return _mm_srai_epi16(A, B);
1305 }
1306
test_mm_srai_epi32(__m128i A)1307 __m128i test_mm_srai_epi32(__m128i A) {
1308 // CHECK-LABEL: test_mm_srai_epi32
1309 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1310 return _mm_srai_epi32(A, 1);
1311 }
1312
test_mm_srai_epi32_1(__m128i A)1313 __m128i test_mm_srai_epi32_1(__m128i A) {
1314 // CHECK-LABEL: test_mm_srai_epi32_1
1315 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1316 return _mm_srai_epi32(A, -1);
1317 }
1318
test_mm_srai_epi32_2(__m128i A,int B)1319 __m128i test_mm_srai_epi32_2(__m128i A, int B) {
1320 // CHECK-LABEL: test_mm_srai_epi32_2
1321 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1322 return _mm_srai_epi32(A, B);
1323 }
1324
test_mm_srl_epi16(__m128i A,__m128i B)1325 __m128i test_mm_srl_epi16(__m128i A, __m128i B) {
1326 // CHECK-LABEL: test_mm_srl_epi16
1327 // CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1328 return _mm_srl_epi16(A, B);
1329 }
1330
test_mm_srl_epi32(__m128i A,__m128i B)1331 __m128i test_mm_srl_epi32(__m128i A, __m128i B) {
1332 // CHECK-LABEL: test_mm_srl_epi32
1333 // CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1334 return _mm_srl_epi32(A, B);
1335 }
1336
test_mm_srl_epi64(__m128i A,__m128i B)1337 __m128i test_mm_srl_epi64(__m128i A, __m128i B) {
1338 // CHECK-LABEL: test_mm_srl_epi64
1339 // CHECK: call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
1340 return _mm_srl_epi64(A, B);
1341 }
1342
test_mm_srli_epi16(__m128i A)1343 __m128i test_mm_srli_epi16(__m128i A) {
1344 // CHECK-LABEL: test_mm_srli_epi16
1345 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1346 return _mm_srli_epi16(A, 1);
1347 }
1348
test_mm_srli_epi16_1(__m128i A)1349 __m128i test_mm_srli_epi16_1(__m128i A) {
1350 // CHECK-LABEL: test_mm_srli_epi16_1
1351 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1352 return _mm_srli_epi16(A, -1);
1353 }
1354
test_mm_srli_epi16_2(__m128i A,int B)1355 __m128i test_mm_srli_epi16_2(__m128i A, int B) {
1356 // CHECK-LABEL: test_mm_srli_epi16
1357 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1358 return _mm_srli_epi16(A, B);
1359 }
1360
test_mm_srli_epi32(__m128i A)1361 __m128i test_mm_srli_epi32(__m128i A) {
1362 // CHECK-LABEL: test_mm_srli_epi32
1363 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1364 return _mm_srli_epi32(A, 1);
1365 }
1366
test_mm_srli_epi32_1(__m128i A)1367 __m128i test_mm_srli_epi32_1(__m128i A) {
1368 // CHECK-LABEL: test_mm_srli_epi32_1
1369 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1370 return _mm_srli_epi32(A, -1);
1371 }
1372
test_mm_srli_epi32_2(__m128i A,int B)1373 __m128i test_mm_srli_epi32_2(__m128i A, int B) {
1374 // CHECK-LABEL: test_mm_srli_epi32_2
1375 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1376 return _mm_srli_epi32(A, B);
1377 }
1378
test_mm_srli_epi64(__m128i A)1379 __m128i test_mm_srli_epi64(__m128i A) {
1380 // CHECK-LABEL: test_mm_srli_epi64
1381 // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1382 return _mm_srli_epi64(A, 1);
1383 }
1384
test_mm_srli_epi64_1(__m128i A)1385 __m128i test_mm_srli_epi64_1(__m128i A) {
1386 // CHECK-LABEL: test_mm_srli_epi64_1
1387 // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1388 return _mm_srli_epi64(A, -1);
1389 }
1390
test_mm_srli_epi64_2(__m128i A,int B)1391 __m128i test_mm_srli_epi64_2(__m128i A, int B) {
1392 // CHECK-LABEL: test_mm_srli_epi64_2
1393 // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1394 return _mm_srli_epi64(A, B);
1395 }
1396
test_mm_srli_si128(__m128i A)1397 __m128i test_mm_srli_si128(__m128i A) {
1398 // CHECK-LABEL: test_mm_srli_si128
1399 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
1400 return _mm_srli_si128(A, 5);
1401 }
1402
test_mm_srli_si128_2(__m128i A)1403 __m128i test_mm_srli_si128_2(__m128i A) {
1404 // CHECK-LABEL: test_mm_srli_si128_2
1405 // ret <2 x i64> zeroinitializer
1406 return _mm_srli_si128(A, 17);
1407 }
1408
test_mm_store_pd(double * A,__m128d B)1409 void test_mm_store_pd(double* A, __m128d B) {
1410 // CHECK-LABEL: test_mm_store_pd
1411 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
1412 _mm_store_pd(A, B);
1413 }
1414
test_mm_store_pd1(double * x,__m128d y)1415 void test_mm_store_pd1(double* x, __m128d y) {
1416 // CHECK-LABEL: test_mm_store_pd1
1417 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
1418 // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16
1419 _mm_store_pd1(x, y);
1420 }
1421
test_mm_store_sd(double * A,__m128d B)1422 void test_mm_store_sd(double* A, __m128d B) {
1423 // CHECK-LABEL: test_mm_store_sd
1424 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1425 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1426 _mm_store_sd(A, B);
1427 }
1428
test_mm_store_si128(__m128i * A,__m128i B)1429 void test_mm_store_si128(__m128i* A, __m128i B) {
1430 // CHECK-LABEL: test_mm_store_si128
1431 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
1432 _mm_store_si128(A, B);
1433 }
1434
test_mm_store1_pd(double * x,__m128d y)1435 void test_mm_store1_pd(double* x, __m128d y) {
1436 // CHECK-LABEL: test_mm_store1_pd
1437 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
1438 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
1439 _mm_store1_pd(x, y);
1440 }
1441
test_mm_storeh_pd(double * A,__m128d B)1442 void test_mm_storeh_pd(double* A, __m128d B) {
1443 // CHECK-LABEL: test_mm_storeh_pd
1444 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
1445 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1446 _mm_storeh_pd(A, B);
1447 }
1448
test_mm_storel_epi64(__m128i x,void * y)1449 void test_mm_storel_epi64(__m128i x, void* y) {
1450 // CHECK-LABEL: test_mm_storel_epi64
1451 // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
1452 // CHECK: store {{.*}} i64* {{.*}}, align 1{{$}}
1453 _mm_storel_epi64(y, x);
1454 }
1455
test_mm_storel_pd(double * A,__m128d B)1456 void test_mm_storel_pd(double* A, __m128d B) {
1457 // CHECK-LABEL: test_mm_storel_pd
1458 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1459 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1460 _mm_storel_pd(A, B);
1461 }
1462
test_mm_storer_pd(__m128d A,double * B)1463 void test_mm_storer_pd(__m128d A, double* B) {
1464 // CHECK-LABEL: test_mm_storer_pd
1465 // CHECK: shufflevector <2 x double> {{.*}}, <2 x double> {{.*}}, <2 x i32> <i32 1, i32 0>
1466 // CHECK: store {{.*}} <2 x double>* {{.*}}, align 16{{$}}
1467 _mm_storer_pd(B, A);
1468 }
1469
test_mm_storeu_pd(double * A,__m128d B)1470 void test_mm_storeu_pd(double* A, __m128d B) {
1471 // CHECK-LABEL: test_mm_storeu_pd
1472 // CHECK: store {{.*}} <2 x double>* {{.*}}, align 1{{$}}
1473 // CHECK-NEXT: ret void
1474 _mm_storeu_pd(A, B);
1475 }
1476
test_mm_storeu_si128(__m128i * A,__m128i B)1477 void test_mm_storeu_si128(__m128i* A, __m128i B) {
1478 // CHECK-LABEL: test_mm_storeu_si128
1479 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}}
1480 // CHECK-NEXT: ret void
1481 _mm_storeu_si128(A, B);
1482 }
1483
test_mm_storeu_si64(void * A,__m128i B)1484 void test_mm_storeu_si64(void* A, __m128i B) {
1485 // CHECK-LABEL: test_mm_storeu_si64
1486 // CHECK: [[EXT:%.*]] = extractelement <2 x i64> %{{.*}}, i32 0
1487 // CHECK: store i64 [[EXT]], i64* %{{.*}}, align 1{{$}}
1488 // CHECK-NEXT: ret void
1489 _mm_storeu_si64(A, B);
1490 }
1491
test_mm_storeu_si32(void * A,__m128i B)1492 void test_mm_storeu_si32(void* A, __m128i B) {
1493 // CHECK-LABEL: test_mm_storeu_si32
1494 // CHECK: [[EXT:%.*]] = extractelement <4 x i32> %{{.*}}, i32 0
1495 // CHECK: store i32 [[EXT]], i32* %{{.*}}, align 1{{$}}
1496 // CHECK-NEXT: ret void
1497 _mm_storeu_si32(A, B);
1498 }
1499
test_mm_storeu_si16(void * A,__m128i B)1500 void test_mm_storeu_si16(void* A, __m128i B) {
1501 // CHECK-LABEL: test_mm_storeu_si16
1502 // CHECK: [[EXT:%.*]] = extractelement <8 x i16> %{{.*}}, i32 0
1503 // CHECK: store i16 [[EXT]], i16* %{{.*}}, align 1{{$}}
1504 // CHECK-NEXT: ret void
1505 _mm_storeu_si16(A, B);
1506 }
1507
test_mm_stream_pd(double * A,__m128d B)1508 void test_mm_stream_pd(double *A, __m128d B) {
1509 // CHECK-LABEL: test_mm_stream_pd
1510 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16, !nontemporal
1511 _mm_stream_pd(A, B);
1512 }
1513
test_mm_stream_si32(int * A,int B)1514 void test_mm_stream_si32(int *A, int B) {
1515 // CHECK-LABEL: test_mm_stream_si32
1516 // CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 1, !nontemporal
1517 _mm_stream_si32(A, B);
1518 }
1519
1520 #ifdef __x86_64__
test_mm_stream_si64(long long * A,long long B)1521 void test_mm_stream_si64(long long *A, long long B) {
1522 // CHECK-LABEL: test_mm_stream_si64
1523 // CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 1, !nontemporal
1524 _mm_stream_si64(A, B);
1525 }
1526 #endif
1527
test_mm_stream_si128(__m128i * A,__m128i B)1528 void test_mm_stream_si128(__m128i *A, __m128i B) {
1529 // CHECK-LABEL: test_mm_stream_si128
1530 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16, !nontemporal
1531 _mm_stream_si128(A, B);
1532 }
1533
test_mm_sub_epi8(__m128i A,__m128i B)1534 __m128i test_mm_sub_epi8(__m128i A, __m128i B) {
1535 // CHECK-LABEL: test_mm_sub_epi8
1536 // CHECK: sub <16 x i8>
1537 return _mm_sub_epi8(A, B);
1538 }
1539
test_mm_sub_epi16(__m128i A,__m128i B)1540 __m128i test_mm_sub_epi16(__m128i A, __m128i B) {
1541 // CHECK-LABEL: test_mm_sub_epi16
1542 // CHECK: sub <8 x i16>
1543 return _mm_sub_epi16(A, B);
1544 }
1545
test_mm_sub_epi32(__m128i A,__m128i B)1546 __m128i test_mm_sub_epi32(__m128i A, __m128i B) {
1547 // CHECK-LABEL: test_mm_sub_epi32
1548 // CHECK: sub <4 x i32>
1549 return _mm_sub_epi32(A, B);
1550 }
1551
test_mm_sub_epi64(__m128i A,__m128i B)1552 __m128i test_mm_sub_epi64(__m128i A, __m128i B) {
1553 // CHECK-LABEL: test_mm_sub_epi64
1554 // CHECK: sub <2 x i64>
1555 return _mm_sub_epi64(A, B);
1556 }
1557
test_mm_sub_pd(__m128d A,__m128d B)1558 __m128d test_mm_sub_pd(__m128d A, __m128d B) {
1559 // CHECK-LABEL: test_mm_sub_pd
1560 // CHECK: fsub <2 x double>
1561 return _mm_sub_pd(A, B);
1562 }
1563
test_mm_sub_sd(__m128d A,__m128d B)1564 __m128d test_mm_sub_sd(__m128d A, __m128d B) {
1565 // CHECK-LABEL: test_mm_sub_sd
1566 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1567 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1568 // CHECK: fsub double
1569 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
1570 return _mm_sub_sd(A, B);
1571 }
1572
test_mm_subs_epi8(__m128i A,__m128i B)1573 __m128i test_mm_subs_epi8(__m128i A, __m128i B) {
1574 // CHECK-LABEL: test_mm_subs_epi8
1575 // CHECK: call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1576 return _mm_subs_epi8(A, B);
1577 }
1578
test_mm_subs_epi16(__m128i A,__m128i B)1579 __m128i test_mm_subs_epi16(__m128i A, __m128i B) {
1580 // CHECK-LABEL: test_mm_subs_epi16
1581 // CHECK: call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1582 return _mm_subs_epi16(A, B);
1583 }
1584
test_mm_subs_epu8(__m128i A,__m128i B)1585 __m128i test_mm_subs_epu8(__m128i A, __m128i B) {
1586 // CHECK-LABEL: test_mm_subs_epu8
1587 // CHECK-NOT: call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1588 // CHECK: call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1589 return _mm_subs_epu8(A, B);
1590 }
1591
test_mm_subs_epu16(__m128i A,__m128i B)1592 __m128i test_mm_subs_epu16(__m128i A, __m128i B) {
1593 // CHECK-LABEL: test_mm_subs_epu16
1594 // CHECK-NOT: call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1595 // CHECK: call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1596 return _mm_subs_epu16(A, B);
1597 }
1598
test_mm_ucomieq_sd(__m128d A,__m128d B)1599 int test_mm_ucomieq_sd(__m128d A, __m128d B) {
1600 // CHECK-LABEL: test_mm_ucomieq_sd
1601 // CHECK: call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1602 return _mm_ucomieq_sd(A, B);
1603 }
1604
test_mm_ucomige_sd(__m128d A,__m128d B)1605 int test_mm_ucomige_sd(__m128d A, __m128d B) {
1606 // CHECK-LABEL: test_mm_ucomige_sd
1607 // CHECK: call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1608 return _mm_ucomige_sd(A, B);
1609 }
1610
test_mm_ucomigt_sd(__m128d A,__m128d B)1611 int test_mm_ucomigt_sd(__m128d A, __m128d B) {
1612 // CHECK-LABEL: test_mm_ucomigt_sd
1613 // CHECK: call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1614 return _mm_ucomigt_sd(A, B);
1615 }
1616
test_mm_ucomile_sd(__m128d A,__m128d B)1617 int test_mm_ucomile_sd(__m128d A, __m128d B) {
1618 // CHECK-LABEL: test_mm_ucomile_sd
1619 // CHECK: call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1620 return _mm_ucomile_sd(A, B);
1621 }
1622
test_mm_ucomilt_sd(__m128d A,__m128d B)1623 int test_mm_ucomilt_sd(__m128d A, __m128d B) {
1624 // CHECK-LABEL: test_mm_ucomilt_sd
1625 // CHECK: call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1626 return _mm_ucomilt_sd(A, B);
1627 }
1628
test_mm_ucomineq_sd(__m128d A,__m128d B)1629 int test_mm_ucomineq_sd(__m128d A, __m128d B) {
1630 // CHECK-LABEL: test_mm_ucomineq_sd
1631 // CHECK: call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1632 return _mm_ucomineq_sd(A, B);
1633 }
1634
test_mm_undefined_pd()1635 __m128d test_mm_undefined_pd() {
1636 // CHECK-LABEL: @test_mm_undefined_pd
1637 // CHECK: ret <2 x double> zeroinitializer
1638 return _mm_undefined_pd();
1639 }
1640
test_mm_undefined_si128()1641 __m128i test_mm_undefined_si128() {
1642 // CHECK-LABEL: @test_mm_undefined_si128
1643 // CHECK: ret <2 x i64> zeroinitializer
1644 return _mm_undefined_si128();
1645 }
1646
test_mm_unpackhi_epi8(__m128i A,__m128i B)1647 __m128i test_mm_unpackhi_epi8(__m128i A, __m128i B) {
1648 // CHECK-LABEL: test_mm_unpackhi_epi8
1649 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1650 return _mm_unpackhi_epi8(A, B);
1651 }
1652
test_mm_unpackhi_epi16(__m128i A,__m128i B)1653 __m128i test_mm_unpackhi_epi16(__m128i A, __m128i B) {
1654 // CHECK-LABEL: test_mm_unpackhi_epi16
1655 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1656 return _mm_unpackhi_epi16(A, B);
1657 }
1658
test_mm_unpackhi_epi32(__m128i A,__m128i B)1659 __m128i test_mm_unpackhi_epi32(__m128i A, __m128i B) {
1660 // CHECK-LABEL: test_mm_unpackhi_epi32
1661 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1662 return _mm_unpackhi_epi32(A, B);
1663 }
1664
test_mm_unpackhi_epi64(__m128i A,__m128i B)1665 __m128i test_mm_unpackhi_epi64(__m128i A, __m128i B) {
1666 // CHECK-LABEL: test_mm_unpackhi_epi64
1667 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 3>
1668 return _mm_unpackhi_epi64(A, B);
1669 }
1670
test_mm_unpackhi_pd(__m128d A,__m128d B)1671 __m128d test_mm_unpackhi_pd(__m128d A, __m128d B) {
1672 // CHECK-LABEL: test_mm_unpackhi_pd
1673 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
1674 return _mm_unpackhi_pd(A, B);
1675 }
1676
test_mm_unpacklo_epi8(__m128i A,__m128i B)1677 __m128i test_mm_unpacklo_epi8(__m128i A, __m128i B) {
1678 // CHECK-LABEL: test_mm_unpacklo_epi8
1679 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1680 return _mm_unpacklo_epi8(A, B);
1681 }
1682
test_mm_unpacklo_epi16(__m128i A,__m128i B)1683 __m128i test_mm_unpacklo_epi16(__m128i A, __m128i B) {
1684 // CHECK-LABEL: test_mm_unpacklo_epi16
1685 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1686 return _mm_unpacklo_epi16(A, B);
1687 }
1688
test_mm_unpacklo_epi32(__m128i A,__m128i B)1689 __m128i test_mm_unpacklo_epi32(__m128i A, __m128i B) {
1690 // CHECK-LABEL: test_mm_unpacklo_epi32
1691 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1692 return _mm_unpacklo_epi32(A, B);
1693 }
1694
test_mm_unpacklo_epi64(__m128i A,__m128i B)1695 __m128i test_mm_unpacklo_epi64(__m128i A, __m128i B) {
1696 // CHECK-LABEL: test_mm_unpacklo_epi64
1697 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
1698 return _mm_unpacklo_epi64(A, B);
1699 }
1700
test_mm_unpacklo_pd(__m128d A,__m128d B)1701 __m128d test_mm_unpacklo_pd(__m128d A, __m128d B) {
1702 // CHECK-LABEL: test_mm_unpacklo_pd
1703 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2>
1704 return _mm_unpacklo_pd(A, B);
1705 }
1706
test_mm_xor_pd(__m128d A,__m128d B)1707 __m128d test_mm_xor_pd(__m128d A, __m128d B) {
1708 // CHECK-LABEL: test_mm_xor_pd
1709 // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
1710 return _mm_xor_pd(A, B);
1711 }
1712
test_mm_xor_si128(__m128i A,__m128i B)1713 __m128i test_mm_xor_si128(__m128i A, __m128i B) {
1714 // CHECK-LABEL: test_mm_xor_si128
1715 // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
1716 return _mm_xor_si128(A, B);
1717 }
1718