1 // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s
2 // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s
3 
4 
5 #include <immintrin.h>
6 
7 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
8 
test_mm_add_epi8(__m128i A,__m128i B)9 __m128i test_mm_add_epi8(__m128i A, __m128i B) {
10   // CHECK-LABEL: test_mm_add_epi8
11   // CHECK: add <16 x i8>
12   return _mm_add_epi8(A, B);
13 }
14 
test_mm_add_epi16(__m128i A,__m128i B)15 __m128i test_mm_add_epi16(__m128i A, __m128i B) {
16   // CHECK-LABEL: test_mm_add_epi16
17   // CHECK: add <8 x i16>
18   return _mm_add_epi16(A, B);
19 }
20 
test_mm_add_epi32(__m128i A,__m128i B)21 __m128i test_mm_add_epi32(__m128i A, __m128i B) {
22   // CHECK-LABEL: test_mm_add_epi32
23   // CHECK: add <4 x i32>
24   return _mm_add_epi32(A, B);
25 }
26 
test_mm_add_epi64(__m128i A,__m128i B)27 __m128i test_mm_add_epi64(__m128i A, __m128i B) {
28   // CHECK-LABEL: test_mm_add_epi64
29   // CHECK: add <2 x i64>
30   return _mm_add_epi64(A, B);
31 }
32 
test_mm_add_pd(__m128d A,__m128d B)33 __m128d test_mm_add_pd(__m128d A, __m128d B) {
34   // CHECK-LABEL: test_mm_add_pd
35   // CHECK: fadd <2 x double>
36   return _mm_add_pd(A, B);
37 }
38 
test_mm_add_sd(__m128d A,__m128d B)39 __m128d test_mm_add_sd(__m128d A, __m128d B) {
40   // CHECK-LABEL: test_mm_add_sd
41   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
42   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
43   // CHECK: fadd double
44   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
45   return _mm_add_sd(A, B);
46 }
47 
test_mm_adds_epi8(__m128i A,__m128i B)48 __m128i test_mm_adds_epi8(__m128i A, __m128i B) {
49   // CHECK-LABEL: test_mm_adds_epi8
50   // CHECK: call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
51   return _mm_adds_epi8(A, B);
52 }
53 
test_mm_adds_epi16(__m128i A,__m128i B)54 __m128i test_mm_adds_epi16(__m128i A, __m128i B) {
55   // CHECK-LABEL: test_mm_adds_epi16
56   // CHECK: call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
57   return _mm_adds_epi16(A, B);
58 }
59 
test_mm_adds_epu8(__m128i A,__m128i B)60 __m128i test_mm_adds_epu8(__m128i A, __m128i B) {
61   // CHECK-LABEL: test_mm_adds_epu8
62   // CHECK: call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
63   return _mm_adds_epu8(A, B);
64 }
65 
test_mm_adds_epu16(__m128i A,__m128i B)66 __m128i test_mm_adds_epu16(__m128i A, __m128i B) {
67   // CHECK-LABEL: test_mm_adds_epu16
68   // CHECK: call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
69   return _mm_adds_epu16(A, B);
70 }
71 
test_mm_and_pd(__m128d A,__m128d B)72 __m128d test_mm_and_pd(__m128d A, __m128d B) {
73   // CHECK-LABEL: test_mm_and_pd
74   // CHECK: and <2 x i64>
75   return _mm_and_pd(A, B);
76 }
77 
test_mm_and_si128(__m128i A,__m128i B)78 __m128i test_mm_and_si128(__m128i A, __m128i B) {
79   // CHECK-LABEL: test_mm_and_si128
80   // CHECK: and <2 x i64>
81   return _mm_and_si128(A, B);
82 }
83 
test_mm_andnot_pd(__m128d A,__m128d B)84 __m128d test_mm_andnot_pd(__m128d A, __m128d B) {
85   // CHECK-LABEL: test_mm_andnot_pd
86   // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
87   // CHECK: and <2 x i64>
88   return _mm_andnot_pd(A, B);
89 }
90 
test_mm_andnot_si128(__m128i A,__m128i B)91 __m128i test_mm_andnot_si128(__m128i A, __m128i B) {
92   // CHECK-LABEL: test_mm_andnot_si128
93   // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
94   // CHECK: and <2 x i64>
95   return _mm_andnot_si128(A, B);
96 }
97 
test_mm_avg_epu8(__m128i A,__m128i B)98 __m128i test_mm_avg_epu8(__m128i A, __m128i B) {
99   // CHECK-LABEL: test_mm_avg_epu8
100   // CHECK-NOT: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
101   // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
102   // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
103   // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
104   // CHECK: add <16 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
105   // CHECK: lshr <16 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
106   // CHECK:trunc <16 x i16> %{{.*}} to <16 x i8>
107   return _mm_avg_epu8(A, B);
108 }
109 
test_mm_avg_epu16(__m128i A,__m128i B)110 __m128i test_mm_avg_epu16(__m128i A, __m128i B) {
111   // CHECK-LABEL: test_mm_avg_epu16
112   // CHECK-NOT: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
113   // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
114   // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
115   // CHECK: add <8 x i32> %{{.*}}, %{{.*}}
116   // CHECK: add <8 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
117   // CHECK: lshr <8 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
118   // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
119   return _mm_avg_epu16(A, B);
120 }
121 
test_mm_bslli_si128(__m128i A)122 __m128i test_mm_bslli_si128(__m128i A) {
123   // CHECK-LABEL: test_mm_bslli_si128
124   // CHECK: shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
125   return _mm_bslli_si128(A, 5);
126 }
127 
test_mm_bsrli_si128(__m128i A)128 __m128i test_mm_bsrli_si128(__m128i A) {
129   // CHECK-LABEL: test_mm_bsrli_si128
130   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
131   return _mm_bsrli_si128(A, 5);
132 }
133 
test_mm_castpd_ps(__m128d A)134 __m128 test_mm_castpd_ps(__m128d A) {
135   // CHECK-LABEL: test_mm_castpd_ps
136   // CHECK: bitcast <2 x double> %{{.*}} to <4 x float>
137   return _mm_castpd_ps(A);
138 }
139 
test_mm_castpd_si128(__m128d A)140 __m128i test_mm_castpd_si128(__m128d A) {
141   // CHECK-LABEL: test_mm_castpd_si128
142   // CHECK: bitcast <2 x double> %{{.*}} to <2 x i64>
143   return _mm_castpd_si128(A);
144 }
145 
test_mm_castps_pd(__m128 A)146 __m128d test_mm_castps_pd(__m128 A) {
147   // CHECK-LABEL: test_mm_castps_pd
148   // CHECK: bitcast <4 x float> %{{.*}} to <2 x double>
149   return _mm_castps_pd(A);
150 }
151 
test_mm_castps_si128(__m128 A)152 __m128i test_mm_castps_si128(__m128 A) {
153   // CHECK-LABEL: test_mm_castps_si128
154   // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64>
155   return _mm_castps_si128(A);
156 }
157 
test_mm_castsi128_pd(__m128i A)158 __m128d test_mm_castsi128_pd(__m128i A) {
159   // CHECK-LABEL: test_mm_castsi128_pd
160   // CHECK: bitcast <2 x i64> %{{.*}} to <2 x double>
161   return _mm_castsi128_pd(A);
162 }
163 
test_mm_castsi128_ps(__m128i A)164 __m128 test_mm_castsi128_ps(__m128i A) {
165   // CHECK-LABEL: test_mm_castsi128_ps
166   // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
167   return _mm_castsi128_ps(A);
168 }
169 
test_mm_clflush(void * A)170 void test_mm_clflush(void* A) {
171   // CHECK-LABEL: test_mm_clflush
172   // CHECK: call void @llvm.x86.sse2.clflush(i8* %{{.*}})
173   _mm_clflush(A);
174 }
175 
test_mm_cmpeq_epi8(__m128i A,__m128i B)176 __m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) {
177   // CHECK-LABEL: test_mm_cmpeq_epi8
178   // CHECK: icmp eq <16 x i8>
179   return _mm_cmpeq_epi8(A, B);
180 }
181 
test_mm_cmpeq_epi16(__m128i A,__m128i B)182 __m128i test_mm_cmpeq_epi16(__m128i A, __m128i B) {
183   // CHECK-LABEL: test_mm_cmpeq_epi16
184   // CHECK: icmp eq <8 x i16>
185   return _mm_cmpeq_epi16(A, B);
186 }
187 
test_mm_cmpeq_epi32(__m128i A,__m128i B)188 __m128i test_mm_cmpeq_epi32(__m128i A, __m128i B) {
189   // CHECK-LABEL: test_mm_cmpeq_epi32
190   // CHECK: icmp eq <4 x i32>
191   return _mm_cmpeq_epi32(A, B);
192 }
193 
test_mm_cmpeq_pd(__m128d A,__m128d B)194 __m128d test_mm_cmpeq_pd(__m128d A, __m128d B) {
195   // CHECK-LABEL: test_mm_cmpeq_pd
196   // CHECK:         [[CMP:%.*]] = fcmp oeq <2 x double>
197   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
198   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
199   // CHECK-NEXT:    ret <2 x double> [[BC]]
200   return _mm_cmpeq_pd(A, B);
201 }
202 
test_mm_cmpeq_sd(__m128d A,__m128d B)203 __m128d test_mm_cmpeq_sd(__m128d A, __m128d B) {
204   // CHECK-LABEL: test_mm_cmpeq_sd
205   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
206   return _mm_cmpeq_sd(A, B);
207 }
208 
test_mm_cmpge_pd(__m128d A,__m128d B)209 __m128d test_mm_cmpge_pd(__m128d A, __m128d B) {
210   // CHECK-LABEL: test_mm_cmpge_pd
211   // CHECK:         [[CMP:%.*]] = fcmp ole <2 x double>
212   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
213   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
214   // CHECK-NEXT:    ret <2 x double> [[BC]]
215   return _mm_cmpge_pd(A, B);
216 }
217 
test_mm_cmpge_sd(__m128d A,__m128d B)218 __m128d test_mm_cmpge_sd(__m128d A, __m128d B) {
219   // CHECK-LABEL: test_mm_cmpge_sd
220   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
221   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
222   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
223   // CHECK: extractelement <2 x double> %{{.*}}, i32 1
224   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
225   return _mm_cmpge_sd(A, B);
226 }
227 
test_mm_cmpgt_epi8(__m128i A,__m128i B)228 __m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) {
229   // CHECK-LABEL: test_mm_cmpgt_epi8
230   // CHECK: icmp sgt <16 x i8>
231   return _mm_cmpgt_epi8(A, B);
232 }
233 
test_mm_cmpgt_epi16(__m128i A,__m128i B)234 __m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) {
235   // CHECK-LABEL: test_mm_cmpgt_epi16
236   // CHECK: icmp sgt <8 x i16>
237   return _mm_cmpgt_epi16(A, B);
238 }
239 
test_mm_cmpgt_epi32(__m128i A,__m128i B)240 __m128i test_mm_cmpgt_epi32(__m128i A, __m128i B) {
241   // CHECK-LABEL: test_mm_cmpgt_epi32
242   // CHECK: icmp sgt <4 x i32>
243   return _mm_cmpgt_epi32(A, B);
244 }
245 
test_mm_cmpgt_pd(__m128d A,__m128d B)246 __m128d test_mm_cmpgt_pd(__m128d A, __m128d B) {
247   // CHECK-LABEL: test_mm_cmpgt_pd
248   // CHECK:         [[CMP:%.*]] = fcmp olt <2 x double>
249   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
250   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
251   // CHECK-NEXT:    ret <2 x double> [[BC]]
252   return _mm_cmpgt_pd(A, B);
253 }
254 
test_mm_cmpgt_sd(__m128d A,__m128d B)255 __m128d test_mm_cmpgt_sd(__m128d A, __m128d B) {
256   // CHECK-LABEL: test_mm_cmpgt_sd
257   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
258   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
259   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
260   // CHECK: extractelement <2 x double> %{{.*}}, i32 1
261   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
262   return _mm_cmpgt_sd(A, B);
263 }
264 
test_mm_cmple_pd(__m128d A,__m128d B)265 __m128d test_mm_cmple_pd(__m128d A, __m128d B) {
266   // CHECK-LABEL: test_mm_cmple_pd
267   // CHECK:         [[CMP:%.*]] = fcmp ole <2 x double>
268   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
269   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
270   // CHECK-NEXT:    ret <2 x double> [[BC]]
271   return _mm_cmple_pd(A, B);
272 }
273 
test_mm_cmple_sd(__m128d A,__m128d B)274 __m128d test_mm_cmple_sd(__m128d A, __m128d B) {
275   // CHECK-LABEL: test_mm_cmple_sd
276   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
277   return _mm_cmple_sd(A, B);
278 }
279 
test_mm_cmplt_epi8(__m128i A,__m128i B)280 __m128i test_mm_cmplt_epi8(__m128i A, __m128i B) {
281   // CHECK-LABEL: test_mm_cmplt_epi8
282   // CHECK: icmp sgt <16 x i8>
283   return _mm_cmplt_epi8(A, B);
284 }
285 
test_mm_cmplt_epi16(__m128i A,__m128i B)286 __m128i test_mm_cmplt_epi16(__m128i A, __m128i B) {
287   // CHECK-LABEL: test_mm_cmplt_epi16
288   // CHECK: icmp sgt <8 x i16>
289   return _mm_cmplt_epi16(A, B);
290 }
291 
test_mm_cmplt_epi32(__m128i A,__m128i B)292 __m128i test_mm_cmplt_epi32(__m128i A, __m128i B) {
293   // CHECK-LABEL: test_mm_cmplt_epi32
294   // CHECK: icmp sgt <4 x i32>
295   return _mm_cmplt_epi32(A, B);
296 }
297 
test_mm_cmplt_pd(__m128d A,__m128d B)298 __m128d test_mm_cmplt_pd(__m128d A, __m128d B) {
299   // CHECK-LABEL: test_mm_cmplt_pd
300   // CHECK:         [[CMP:%.*]] = fcmp olt <2 x double>
301   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
302   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
303   // CHECK-NEXT:    ret <2 x double> [[BC]]
304   return _mm_cmplt_pd(A, B);
305 }
306 
test_mm_cmplt_sd(__m128d A,__m128d B)307 __m128d test_mm_cmplt_sd(__m128d A, __m128d B) {
308   // CHECK-LABEL: test_mm_cmplt_sd
309   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
310   return _mm_cmplt_sd(A, B);
311 }
312 
test_mm_cmpneq_pd(__m128d A,__m128d B)313 __m128d test_mm_cmpneq_pd(__m128d A, __m128d B) {
314   // CHECK-LABEL: test_mm_cmpneq_pd
315   // CHECK:         [[CMP:%.*]] = fcmp une <2 x double>
316   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
317   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
318   // CHECK-NEXT:    ret <2 x double> [[BC]]
319   return _mm_cmpneq_pd(A, B);
320 }
321 
test_mm_cmpneq_sd(__m128d A,__m128d B)322 __m128d test_mm_cmpneq_sd(__m128d A, __m128d B) {
323   // CHECK-LABEL: test_mm_cmpneq_sd
324   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
325   return _mm_cmpneq_sd(A, B);
326 }
327 
test_mm_cmpnge_pd(__m128d A,__m128d B)328 __m128d test_mm_cmpnge_pd(__m128d A, __m128d B) {
329   // CHECK-LABEL: test_mm_cmpnge_pd
330   // CHECK:         [[CMP:%.*]] = fcmp ugt <2 x double>
331   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
332   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
333   // CHECK-NEXT:    ret <2 x double> [[BC]]
334   return _mm_cmpnge_pd(A, B);
335 }
336 
test_mm_cmpnge_sd(__m128d A,__m128d B)337 __m128d test_mm_cmpnge_sd(__m128d A, __m128d B) {
338   // CHECK-LABEL: test_mm_cmpnge_sd
339   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
340   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
341   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
342   // CHECK: extractelement <2 x double> %{{.*}}, i32 1
343   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
344   return _mm_cmpnge_sd(A, B);
345 }
346 
test_mm_cmpngt_pd(__m128d A,__m128d B)347 __m128d test_mm_cmpngt_pd(__m128d A, __m128d B) {
348   // CHECK-LABEL: test_mm_cmpngt_pd
349   // CHECK:         [[CMP:%.*]] = fcmp uge <2 x double>
350   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
351   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
352   // CHECK-NEXT:    ret <2 x double> [[BC]]
353   return _mm_cmpngt_pd(A, B);
354 }
355 
test_mm_cmpngt_sd(__m128d A,__m128d B)356 __m128d test_mm_cmpngt_sd(__m128d A, __m128d B) {
357   // CHECK-LABEL: test_mm_cmpngt_sd
358   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
359   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
360   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
361   // CHECK: extractelement <2 x double> %{{.*}}, i32 1
362   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
363   return _mm_cmpngt_sd(A, B);
364 }
365 
test_mm_cmpnle_pd(__m128d A,__m128d B)366 __m128d test_mm_cmpnle_pd(__m128d A, __m128d B) {
367   // CHECK-LABEL: test_mm_cmpnle_pd
368   // CHECK:         [[CMP:%.*]] = fcmp ugt <2 x double>
369   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
370   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
371   // CHECK-NEXT:    ret <2 x double> [[BC]]
372   return _mm_cmpnle_pd(A, B);
373 }
374 
test_mm_cmpnle_sd(__m128d A,__m128d B)375 __m128d test_mm_cmpnle_sd(__m128d A, __m128d B) {
376   // CHECK-LABEL: test_mm_cmpnle_sd
377   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
378   return _mm_cmpnle_sd(A, B);
379 }
380 
test_mm_cmpnlt_pd(__m128d A,__m128d B)381 __m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) {
382   // CHECK-LABEL: test_mm_cmpnlt_pd
383   // CHECK:         [[CMP:%.*]] = fcmp uge <2 x double>
384   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
385   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
386   // CHECK-NEXT:    ret <2 x double> [[BC]]
387   return _mm_cmpnlt_pd(A, B);
388 }
389 
test_mm_cmpnlt_sd(__m128d A,__m128d B)390 __m128d test_mm_cmpnlt_sd(__m128d A, __m128d B) {
391   // CHECK-LABEL: test_mm_cmpnlt_sd
392   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
393   return _mm_cmpnlt_sd(A, B);
394 }
395 
test_mm_cmpord_pd(__m128d A,__m128d B)396 __m128d test_mm_cmpord_pd(__m128d A, __m128d B) {
397   // CHECK-LABEL: test_mm_cmpord_pd
398   // CHECK:         [[CMP:%.*]] = fcmp ord <2 x double>
399   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
400   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
401   // CHECK-NEXT:    ret <2 x double> [[BC]]
402   return _mm_cmpord_pd(A, B);
403 }
404 
test_mm_cmpord_sd(__m128d A,__m128d B)405 __m128d test_mm_cmpord_sd(__m128d A, __m128d B) {
406   // CHECK-LABEL: test_mm_cmpord_sd
407   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
408   return _mm_cmpord_sd(A, B);
409 }
410 
test_mm_cmpunord_pd(__m128d A,__m128d B)411 __m128d test_mm_cmpunord_pd(__m128d A, __m128d B) {
412   // CHECK-LABEL: test_mm_cmpunord_pd
413   // CHECK:         [[CMP:%.*]] = fcmp uno <2 x double>
414   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
415   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
416   // CHECK-NEXT:    ret <2 x double> [[BC]]
417   return _mm_cmpunord_pd(A, B);
418 }
419 
test_mm_cmpunord_sd(__m128d A,__m128d B)420 __m128d test_mm_cmpunord_sd(__m128d A, __m128d B) {
421   // CHECK-LABEL: test_mm_cmpunord_sd
422   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
423   return _mm_cmpunord_sd(A, B);
424 }
425 
test_mm_comieq_sd(__m128d A,__m128d B)426 int test_mm_comieq_sd(__m128d A, __m128d B) {
427   // CHECK-LABEL: test_mm_comieq_sd
428   // CHECK: call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
429   return _mm_comieq_sd(A, B);
430 }
431 
test_mm_comige_sd(__m128d A,__m128d B)432 int test_mm_comige_sd(__m128d A, __m128d B) {
433   // CHECK-LABEL: test_mm_comige_sd
434   // CHECK: call i32 @llvm.x86.sse2.comige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
435   return _mm_comige_sd(A, B);
436 }
437 
test_mm_comigt_sd(__m128d A,__m128d B)438 int test_mm_comigt_sd(__m128d A, __m128d B) {
439   // CHECK-LABEL: test_mm_comigt_sd
440   // CHECK: call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
441   return _mm_comigt_sd(A, B);
442 }
443 
test_mm_comile_sd(__m128d A,__m128d B)444 int test_mm_comile_sd(__m128d A, __m128d B) {
445   // CHECK-LABEL: test_mm_comile_sd
446   // CHECK: call i32 @llvm.x86.sse2.comile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
447   return _mm_comile_sd(A, B);
448 }
449 
test_mm_comilt_sd(__m128d A,__m128d B)450 int test_mm_comilt_sd(__m128d A, __m128d B) {
451   // CHECK-LABEL: test_mm_comilt_sd
452   // CHECK: call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
453   return _mm_comilt_sd(A, B);
454 }
455 
test_mm_comineq_sd(__m128d A,__m128d B)456 int test_mm_comineq_sd(__m128d A, __m128d B) {
457   // CHECK-LABEL: test_mm_comineq_sd
458   // CHECK: call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
459   return _mm_comineq_sd(A, B);
460 }
461 
test_mm_cvtepi32_pd(__m128i A)462 __m128d test_mm_cvtepi32_pd(__m128i A) {
463   // CHECK-LABEL: test_mm_cvtepi32_pd
464   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> <i32 0, i32 1>
465   // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double>
466   return _mm_cvtepi32_pd(A);
467 }
468 
test_mm_cvtepi32_ps(__m128i A)469 __m128 test_mm_cvtepi32_ps(__m128i A) {
470   // CHECK-LABEL: test_mm_cvtepi32_ps
471   // CHECK: sitofp <4 x i32> %{{.*}} to <4 x float>
472   return _mm_cvtepi32_ps(A);
473 }
474 
test_mm_cvtpd_epi32(__m128d A)475 __m128i test_mm_cvtpd_epi32(__m128d A) {
476   // CHECK-LABEL: test_mm_cvtpd_epi32
477   // CHECK: call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %{{.*}})
478   return _mm_cvtpd_epi32(A);
479 }
480 
test_mm_cvtpd_ps(__m128d A)481 __m128 test_mm_cvtpd_ps(__m128d A) {
482   // CHECK-LABEL: test_mm_cvtpd_ps
483   // CHECK: call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %{{.*}})
484   return _mm_cvtpd_ps(A);
485 }
486 
test_mm_cvtps_epi32(__m128 A)487 __m128i test_mm_cvtps_epi32(__m128 A) {
488   // CHECK-LABEL: test_mm_cvtps_epi32
489   // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %{{.*}})
490   return _mm_cvtps_epi32(A);
491 }
492 
test_mm_cvtps_pd(__m128 A)493 __m128d test_mm_cvtps_pd(__m128 A) {
494   // CHECK-LABEL: test_mm_cvtps_pd
495   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 0, i32 1>
496   // CHECK: fpext <2 x float> %{{.*}} to <2 x double>
497   return _mm_cvtps_pd(A);
498 }
499 
test_mm_cvtsd_f64(__m128d A)500 double test_mm_cvtsd_f64(__m128d A) {
501   // CHECK-LABEL: test_mm_cvtsd_f64
502   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
503   return _mm_cvtsd_f64(A);
504 }
505 
test_mm_cvtsd_si32(__m128d A)506 int test_mm_cvtsd_si32(__m128d A) {
507   // CHECK-LABEL: test_mm_cvtsd_si32
508   // CHECK: call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %{{.*}})
509   return _mm_cvtsd_si32(A);
510 }
511 
test_mm_cvtsd_si64(__m128d A)512 long long test_mm_cvtsd_si64(__m128d A) {
513   // CHECK-LABEL: test_mm_cvtsd_si64
514   // CHECK: call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %{{.*}})
515   return _mm_cvtsd_si64(A);
516 }
517 
test_mm_cvtsd_ss(__m128 A,__m128d B)518 __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) {
519   // CHECK-LABEL: test_mm_cvtsd_ss
520   // CHECK: call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %{{.*}}, <2 x double> %{{.*}})
521   return _mm_cvtsd_ss(A, B);
522 }
523 
test_mm_cvtsi128_si32(__m128i A)524 int test_mm_cvtsi128_si32(__m128i A) {
525   // CHECK-LABEL: test_mm_cvtsi128_si32
526   // CHECK: extractelement <4 x i32> %{{.*}}, i32 0
527   return _mm_cvtsi128_si32(A);
528 }
529 
test_mm_cvtsi128_si64(__m128i A)530 long long test_mm_cvtsi128_si64(__m128i A) {
531   // CHECK-LABEL: test_mm_cvtsi128_si64
532   // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
533   return _mm_cvtsi128_si64(A);
534 }
535 
test_mm_cvtsi32_sd(__m128d A,int B)536 __m128d test_mm_cvtsi32_sd(__m128d A, int B) {
537   // CHECK-LABEL: test_mm_cvtsi32_sd
538   // CHECK: sitofp i32 %{{.*}} to double
539   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
540   return _mm_cvtsi32_sd(A, B);
541 }
542 
test_mm_cvtsi32_si128(int A)543 __m128i test_mm_cvtsi32_si128(int A) {
544   // CHECK-LABEL: test_mm_cvtsi32_si128
545   // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
546   // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 1
547   // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 2
548   // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 3
549   return _mm_cvtsi32_si128(A);
550 }
551 
test_mm_cvtsi64_sd(__m128d A,long long B)552 __m128d test_mm_cvtsi64_sd(__m128d A, long long B) {
553   // CHECK-LABEL: test_mm_cvtsi64_sd
554   // CHECK: sitofp i64 %{{.*}} to double
555   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
556   return _mm_cvtsi64_sd(A, B);
557 }
558 
test_mm_cvtsi64_si128(long long A)559 __m128i test_mm_cvtsi64_si128(long long A) {
560   // CHECK-LABEL: test_mm_cvtsi64_si128
561   // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
562   // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
563   return _mm_cvtsi64_si128(A);
564 }
565 
test_mm_cvtss_sd(__m128d A,__m128 B)566 __m128d test_mm_cvtss_sd(__m128d A, __m128 B) {
567   // CHECK-LABEL: test_mm_cvtss_sd
568   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
569   // CHECK: fpext float %{{.*}} to double
570   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
571   return _mm_cvtss_sd(A, B);
572 }
573 
test_mm_cvttpd_epi32(__m128d A)574 __m128i test_mm_cvttpd_epi32(__m128d A) {
575   // CHECK-LABEL: test_mm_cvttpd_epi32
576   // CHECK: call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %{{.*}})
577   return _mm_cvttpd_epi32(A);
578 }
579 
test_mm_cvttps_epi32(__m128 A)580 __m128i test_mm_cvttps_epi32(__m128 A) {
581   // CHECK-LABEL: test_mm_cvttps_epi32
582   // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %{{.*}})
583   return _mm_cvttps_epi32(A);
584 }
585 
test_mm_cvttsd_si32(__m128d A)586 int test_mm_cvttsd_si32(__m128d A) {
587   // CHECK-LABEL: test_mm_cvttsd_si32
588   // CHECK: call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %{{.*}})
589   return _mm_cvttsd_si32(A);
590 }
591 
test_mm_cvttsd_si64(__m128d A)592 long long test_mm_cvttsd_si64(__m128d A) {
593   // CHECK-LABEL: test_mm_cvttsd_si64
594   // CHECK: call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %{{.*}})
595   return _mm_cvttsd_si64(A);
596 }
597 
test_mm_div_pd(__m128d A,__m128d B)598 __m128d test_mm_div_pd(__m128d A, __m128d B) {
599   // CHECK-LABEL: test_mm_div_pd
600   // CHECK: fdiv <2 x double>
601   return _mm_div_pd(A, B);
602 }
603 
test_mm_div_sd(__m128d A,__m128d B)604 __m128d test_mm_div_sd(__m128d A, __m128d B) {
605   // CHECK-LABEL: test_mm_div_sd
606   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
607   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
608   // CHECK: fdiv double
609   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
610   return _mm_div_sd(A, B);
611 }
612 
613 // Lowering to pextrw requires optimization.
test_mm_extract_epi16(__m128i A)614 int test_mm_extract_epi16(__m128i A) {
615   // CHECK-LABEL: test_mm_extract_epi16
616   // CHECK: extractelement <8 x i16> %{{.*}}, {{i32|i64}} 1
617   // CHECK: zext i16 %{{.*}} to i32
618   return _mm_extract_epi16(A, 1);
619 }
620 
test_mm_insert_epi16(__m128i A,int B)621 __m128i test_mm_insert_epi16(__m128i A, int B) {
622   // CHECK-LABEL: test_mm_insert_epi16
623   // CHECK: insertelement <8 x i16> %{{.*}}, {{i32|i64}} 0
624   return _mm_insert_epi16(A, B, 0);
625 }
626 
test_mm_lfence()627 void test_mm_lfence() {
628   // CHECK-LABEL: test_mm_lfence
629   // CHECK: call void @llvm.x86.sse2.lfence()
630   _mm_lfence();
631 }
632 
test_mm_load_pd(double const * A)633 __m128d test_mm_load_pd(double const* A) {
634   // CHECK-LABEL: test_mm_load_pd
635   // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
636   return _mm_load_pd(A);
637 }
638 
test_mm_load_pd1(double const * A)639 __m128d test_mm_load_pd1(double const* A) {
640   // CHECK-LABEL: test_mm_load_pd1
641   // CHECK: load double, double* %{{.*}}, align 8
642   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
643   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
644   return _mm_load_pd1(A);
645 }
646 
test_mm_load_sd(double const * A)647 __m128d test_mm_load_sd(double const* A) {
648   // CHECK-LABEL: test_mm_load_sd
649   // CHECK: load double, double* %{{.*}}, align 1{{$}}
650   return _mm_load_sd(A);
651 }
652 
test_mm_load_si128(__m128i const * A)653 __m128i test_mm_load_si128(__m128i const* A) {
654   // CHECK-LABEL: test_mm_load_si128
655   // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
656   return _mm_load_si128(A);
657 }
658 
test_mm_load1_pd(double const * A)659 __m128d test_mm_load1_pd(double const* A) {
660   // CHECK-LABEL: test_mm_load1_pd
661   // CHECK: load double, double* %{{.*}}, align 8
662   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
663   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
664   return _mm_load1_pd(A);
665 }
666 
test_mm_loadh_pd(__m128d x,void * y)667 __m128d test_mm_loadh_pd(__m128d x, void* y) {
668   // CHECK-LABEL: test_mm_loadh_pd
669   // CHECK: load double, double* %{{.*}}, align 1{{$}}
670   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
671   return _mm_loadh_pd(x, y);
672 }
673 
test_mm_loadl_epi64(__m128i * y)674 __m128i test_mm_loadl_epi64(__m128i* y) {
675   // CHECK: test_mm_loadl_epi64
676   // CHECK: load i64, i64* {{.*}}, align 1{{$}}
677   // CHECK: insertelement <2 x i64> undef, i64 {{.*}}, i32 0
678   // CHECK: insertelement <2 x i64> {{.*}}, i64 0, i32 1
679   return _mm_loadl_epi64(y);
680 }
681 
test_mm_loadl_pd(__m128d x,void * y)682 __m128d test_mm_loadl_pd(__m128d x, void* y) {
683   // CHECK-LABEL: test_mm_loadl_pd
684   // CHECK: load double, double* %{{.*}}, align 1{{$}}
685   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
686   // CHECK: extractelement <2 x double> %{{.*}}, i32 1
687   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
688   return _mm_loadl_pd(x, y);
689 }
690 
test_mm_loadr_pd(double const * A)691 __m128d test_mm_loadr_pd(double const* A) {
692   // CHECK-LABEL: test_mm_loadr_pd
693   // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
694   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
695   return _mm_loadr_pd(A);
696 }
697 
test_mm_loadu_pd(double const * A)698 __m128d test_mm_loadu_pd(double const* A) {
699   // CHECK-LABEL: test_mm_loadu_pd
700   // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1{{$}}
701   return _mm_loadu_pd(A);
702 }
703 
test_mm_loadu_si128(__m128i const * A)704 __m128i test_mm_loadu_si128(__m128i const* A) {
705   // CHECK-LABEL: test_mm_loadu_si128
706   // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1{{$}}
707   return _mm_loadu_si128(A);
708 }
709 
test_mm_loadu_si64(void const * A)710 __m128i test_mm_loadu_si64(void const* A) {
711   // CHECK-LABEL: test_mm_loadu_si64
712   // CHECK: load i64, i64* %{{.*}}, align 1{{$}}
713   // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
714   // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
715   return _mm_loadu_si64(A);
716 }
717 
test_mm_madd_epi16(__m128i A,__m128i B)718 __m128i test_mm_madd_epi16(__m128i A, __m128i B) {
719   // CHECK-LABEL: test_mm_madd_epi16
720   // CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
721   return _mm_madd_epi16(A, B);
722 }
723 
test_mm_maskmoveu_si128(__m128i A,__m128i B,char * C)724 void test_mm_maskmoveu_si128(__m128i A, __m128i B, char* C) {
725   // CHECK-LABEL: test_mm_maskmoveu_si128
726   // CHECK: call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8* %{{.*}})
727   _mm_maskmoveu_si128(A, B, C);
728 }
729 
test_mm_max_epi16(__m128i A,__m128i B)730 __m128i test_mm_max_epi16(__m128i A, __m128i B) {
731   // CHECK-LABEL: test_mm_max_epi16
732   // CHECK:       [[CMP:%.*]] = icmp sgt <8 x i16> [[X:%.*]], [[Y:%.*]]
733   // CHECK-NEXT:  select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]]
734   return _mm_max_epi16(A, B);
735 }
736 
test_mm_max_epu8(__m128i A,__m128i B)737 __m128i test_mm_max_epu8(__m128i A, __m128i B) {
738   // CHECK-LABEL: test_mm_max_epu8
739   // CHECK:       [[CMP:%.*]] = icmp ugt <16 x i8> [[X:%.*]], [[Y:%.*]]
740   // CHECK-NEXT:  select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]]
741   return _mm_max_epu8(A, B);
742 }
743 
test_mm_max_pd(__m128d A,__m128d B)744 __m128d test_mm_max_pd(__m128d A, __m128d B) {
745   // CHECK-LABEL: test_mm_max_pd
746   // CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
747   return _mm_max_pd(A, B);
748 }
749 
test_mm_max_sd(__m128d A,__m128d B)750 __m128d test_mm_max_sd(__m128d A, __m128d B) {
751   // CHECK-LABEL: test_mm_max_sd
752   // CHECK: call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
753   return _mm_max_sd(A, B);
754 }
755 
test_mm_mfence()756 void test_mm_mfence() {
757   // CHECK-LABEL: test_mm_mfence
758   // CHECK: call void @llvm.x86.sse2.mfence()
759   _mm_mfence();
760 }
761 
test_mm_min_epi16(__m128i A,__m128i B)762 __m128i test_mm_min_epi16(__m128i A, __m128i B) {
763   // CHECK-LABEL: test_mm_min_epi16
764   // CHECK:       [[CMP:%.*]] = icmp slt <8 x i16> [[X:%.*]], [[Y:%.*]]
765   // CHECK-NEXT:  select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]]
766   return _mm_min_epi16(A, B);
767 }
768 
test_mm_min_epu8(__m128i A,__m128i B)769 __m128i test_mm_min_epu8(__m128i A, __m128i B) {
770   // CHECK-LABEL: test_mm_min_epu8
771   // CHECK:       [[CMP:%.*]] = icmp ult <16 x i8> [[X:%.*]], [[Y:%.*]]
772   // CHECK-NEXT:  select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]]
773   return _mm_min_epu8(A, B);
774 }
775 
test_mm_min_pd(__m128d A,__m128d B)776 __m128d test_mm_min_pd(__m128d A, __m128d B) {
777   // CHECK-LABEL: test_mm_min_pd
778   // CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
779   return _mm_min_pd(A, B);
780 }
781 
test_mm_min_sd(__m128d A,__m128d B)782 __m128d test_mm_min_sd(__m128d A, __m128d B) {
783   // CHECK-LABEL: test_mm_min_sd
784   // CHECK: call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
785   return _mm_min_sd(A, B);
786 }
787 
test_mm_move_epi64(__m128i A)788 __m128i test_mm_move_epi64(__m128i A) {
789   // CHECK-LABEL: test_mm_move_epi64
790   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
791   return _mm_move_epi64(A);
792 }
793 
test_mm_move_sd(__m128d A,__m128d B)794 __m128d test_mm_move_sd(__m128d A, __m128d B) {
795   // CHECK-LABEL: test_mm_move_sd
796   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
797   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
798   return _mm_move_sd(A, B);
799 }
800 
test_mm_movemask_epi8(__m128i A)801 int test_mm_movemask_epi8(__m128i A) {
802   // CHECK-LABEL: test_mm_movemask_epi8
803   // CHECK: call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}})
804   return _mm_movemask_epi8(A);
805 }
806 
test_mm_movemask_pd(__m128d A)807 int test_mm_movemask_pd(__m128d A) {
808   // CHECK-LABEL: test_mm_movemask_pd
809   // CHECK: call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}})
810   return _mm_movemask_pd(A);
811 }
812 
test_mm_mul_epu32(__m128i A,__m128i B)813 __m128i test_mm_mul_epu32(__m128i A, __m128i B) {
814   // CHECK-LABEL: test_mm_mul_epu32
815   // CHECK: and <2 x i64> %{{.*}}, <i64 4294967295, i64 4294967295>
816   // CHECK: and <2 x i64> %{{.*}}, <i64 4294967295, i64 4294967295>
817   // CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
818   return _mm_mul_epu32(A, B);
819 }
820 
test_mm_mul_pd(__m128d A,__m128d B)821 __m128d test_mm_mul_pd(__m128d A, __m128d B) {
822   // CHECK-LABEL: test_mm_mul_pd
823   // CHECK: fmul <2 x double> %{{.*}}, %{{.*}}
824   return _mm_mul_pd(A, B);
825 }
826 
test_mm_mul_sd(__m128d A,__m128d B)827 __m128d test_mm_mul_sd(__m128d A, __m128d B) {
828   // CHECK-LABEL: test_mm_mul_sd
829   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
830   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
831   // CHECK: fmul double
832   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
833   return _mm_mul_sd(A, B);
834 }
835 
test_mm_mulhi_epi16(__m128i A,__m128i B)836 __m128i test_mm_mulhi_epi16(__m128i A, __m128i B) {
837   // CHECK-LABEL: test_mm_mulhi_epi16
838   // CHECK: call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
839   return _mm_mulhi_epi16(A, B);
840 }
841 
test_mm_mulhi_epu16(__m128i A,__m128i B)842 __m128i test_mm_mulhi_epu16(__m128i A, __m128i B) {
843   // CHECK-LABEL: test_mm_mulhi_epu16
844   // CHECK: call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
845   return _mm_mulhi_epu16(A, B);
846 }
847 
test_mm_mullo_epi16(__m128i A,__m128i B)848 __m128i test_mm_mullo_epi16(__m128i A, __m128i B) {
849   // CHECK-LABEL: test_mm_mullo_epi16
850   // CHECK: mul <8 x i16> %{{.*}}, %{{.*}}
851   return _mm_mullo_epi16(A, B);
852 }
853 
test_mm_or_pd(__m128d A,__m128d B)854 __m128d test_mm_or_pd(__m128d A, __m128d B) {
855   // CHECK-LABEL: test_mm_or_pd
856   // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
857   return _mm_or_pd(A, B);
858 }
859 
test_mm_or_si128(__m128i A,__m128i B)860 __m128i test_mm_or_si128(__m128i A, __m128i B) {
861   // CHECK-LABEL: test_mm_or_si128
862   // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
863   return _mm_or_si128(A, B);
864 }
865 
test_mm_packs_epi16(__m128i A,__m128i B)866 __m128i test_mm_packs_epi16(__m128i A, __m128i B) {
867   // CHECK-LABEL: test_mm_packs_epi16
868   // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
869   return _mm_packs_epi16(A, B);
870 }
871 
test_mm_packs_epi32(__m128i A,__m128i B)872 __m128i test_mm_packs_epi32(__m128i A, __m128i B) {
873   // CHECK-LABEL: test_mm_packs_epi32
874   // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
875   return _mm_packs_epi32(A, B);
876 }
877 
test_mm_packus_epi16(__m128i A,__m128i B)878 __m128i test_mm_packus_epi16(__m128i A, __m128i B) {
879   // CHECK-LABEL: test_mm_packus_epi16
880   // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
881   return _mm_packus_epi16(A, B);
882 }
883 
test_mm_pause()884 void test_mm_pause() {
885   // CHECK-LABEL: test_mm_pause
886   // CHECK: call void @llvm.x86.sse2.pause()
887   return _mm_pause();
888 }
889 
test_mm_sad_epu8(__m128i A,__m128i B)890 __m128i test_mm_sad_epu8(__m128i A, __m128i B) {
891   // CHECK-LABEL: test_mm_sad_epu8
892   // CHECK: call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
893   return _mm_sad_epu8(A, B);
894 }
895 
test_mm_set_epi8(char A,char B,char C,char D,char E,char F,char G,char H,char I,char J,char K,char L,char M,char N,char O,char P)896 __m128i test_mm_set_epi8(char A, char B, char C, char D,
897                          char E, char F, char G, char H,
898                          char I, char J, char K, char L,
899                          char M, char N, char O, char P) {
900   // CHECK-LABEL: test_mm_set_epi8
901   // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
902   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
903   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
904   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
905   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
906   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
907   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
908   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
909   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
910   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
911   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
912   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
913   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
914   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
915   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
916   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
917   return _mm_set_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
918 }
919 
test_mm_set_epi16(short A,short B,short C,short D,short E,short F,short G,short H)920 __m128i test_mm_set_epi16(short A, short B, short C, short D,
921                           short E, short F, short G, short H) {
922   // CHECK-LABEL: test_mm_set_epi16
923   // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
924   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
925   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
926   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
927   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
928   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
929   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
930   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
931   return _mm_set_epi16(A, B, C, D, E, F, G, H);
932 }
933 
test_mm_set_epi32(int A,int B,int C,int D)934 __m128i test_mm_set_epi32(int A, int B, int C, int D) {
935   // CHECK-LABEL: test_mm_set_epi32
936   // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
937   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
938   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
939   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
940   return _mm_set_epi32(A, B, C, D);
941 }
942 
test_mm_set_epi64(__m64 A,__m64 B)943 __m128i test_mm_set_epi64(__m64 A, __m64 B) {
944   // CHECK-LABEL: test_mm_set_epi64
945   // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
946   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
947   return _mm_set_epi64(A, B);
948 }
949 
test_mm_set_epi64x(long long A,long long B)950 __m128i test_mm_set_epi64x(long long A, long long B) {
951   // CHECK-LABEL: test_mm_set_epi64x
952   // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
953   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
954   return _mm_set_epi64x(A, B);
955 }
956 
test_mm_set_pd(double A,double B)957 __m128d test_mm_set_pd(double A, double B) {
958   // CHECK-LABEL: test_mm_set_pd
959   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
960   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
961   return _mm_set_pd(A, B);
962 }
963 
test_mm_set_pd1(double A)964 __m128d test_mm_set_pd1(double A) {
965   // CHECK-LABEL: test_mm_set_pd1
966   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
967   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
968   return _mm_set_pd1(A);
969 }
970 
test_mm_set_sd(double A)971 __m128d test_mm_set_sd(double A) {
972   // CHECK-LABEL: test_mm_set_sd
973   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
974   // CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1
975   return _mm_set_sd(A);
976 }
977 
test_mm_set1_epi8(char A)978 __m128i test_mm_set1_epi8(char A) {
979   // CHECK-LABEL: test_mm_set1_epi8
980   // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
981   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
982   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
983   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
984   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
985   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
986   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
987   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
988   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
989   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
990   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
991   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
992   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
993   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
994   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
995   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
996   return _mm_set1_epi8(A);
997 }
998 
test_mm_set1_epi16(short A)999 __m128i test_mm_set1_epi16(short A) {
1000   // CHECK-LABEL: test_mm_set1_epi16
1001   // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
1002   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
1003   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
1004   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
1005   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
1006   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
1007   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
1008   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
1009   return _mm_set1_epi16(A);
1010 }
1011 
test_mm_set1_epi32(int A)1012 __m128i test_mm_set1_epi32(int A) {
1013   // CHECK-LABEL: test_mm_set1_epi32
1014   // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
1015   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
1016   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
1017   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
1018   return _mm_set1_epi32(A);
1019 }
1020 
test_mm_set1_epi64(__m64 A)1021 __m128i test_mm_set1_epi64(__m64 A) {
1022   // CHECK-LABEL: test_mm_set1_epi64
1023   // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1024   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1025   return _mm_set1_epi64(A);
1026 }
1027 
test_mm_set1_epi64x(long long A)1028 __m128i test_mm_set1_epi64x(long long A) {
1029   // CHECK-LABEL: test_mm_set1_epi64x
1030   // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1031   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1032   return _mm_set1_epi64x(A);
1033 }
1034 
test_mm_set1_pd(double A)1035 __m128d test_mm_set1_pd(double A) {
1036   // CHECK-LABEL: test_mm_set1_pd
1037   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1038   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1039   return _mm_set1_pd(A);
1040 }
1041 
test_mm_setr_epi8(char A,char B,char C,char D,char E,char F,char G,char H,char I,char J,char K,char L,char M,char N,char O,char P)1042 __m128i test_mm_setr_epi8(char A, char B, char C, char D,
1043                           char E, char F, char G, char H,
1044                           char I, char J, char K, char L,
1045                           char M, char N, char O, char P) {
1046   // CHECK-LABEL: test_mm_setr_epi8
1047   // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
1048   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
1049   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
1050   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
1051   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
1052   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
1053   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
1054   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
1055   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
1056   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
1057   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
1058   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
1059   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
1060   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
1061   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
1062   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
1063   return _mm_setr_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
1064 }
1065 
test_mm_setr_epi16(short A,short B,short C,short D,short E,short F,short G,short H)1066 __m128i test_mm_setr_epi16(short A, short B, short C, short D,
1067                            short E, short F, short G, short H) {
1068   // CHECK-LABEL: test_mm_setr_epi16
1069   // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
1070   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
1071   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
1072   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
1073   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
1074   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
1075   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
1076   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
1077   return _mm_setr_epi16(A, B, C, D, E, F, G, H);
1078 }
1079 
test_mm_setr_epi32(int A,int B,int C,int D)1080 __m128i test_mm_setr_epi32(int A, int B, int C, int D) {
1081   // CHECK-LABEL: test_mm_setr_epi32
1082   // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
1083   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
1084   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
1085   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
1086   return _mm_setr_epi32(A, B, C, D);
1087 }
1088 
test_mm_setr_epi64(__m64 A,__m64 B)1089 __m128i test_mm_setr_epi64(__m64 A, __m64 B) {
1090   // CHECK-LABEL: test_mm_setr_epi64
1091   // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1092   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1093   return _mm_setr_epi64(A, B);
1094 }
1095 
test_mm_setr_pd(double A,double B)1096 __m128d test_mm_setr_pd(double A, double B) {
1097   // CHECK-LABEL: test_mm_setr_pd
1098   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1099   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1100   return _mm_setr_pd(A, B);
1101 }
1102 
test_mm_setzero_pd()1103 __m128d test_mm_setzero_pd() {
1104   // CHECK-LABEL: test_mm_setzero_pd
1105   // CHECK: store <2 x double> zeroinitializer
1106   return _mm_setzero_pd();
1107 }
1108 
test_mm_setzero_si128()1109 __m128i test_mm_setzero_si128() {
1110   // CHECK-LABEL: test_mm_setzero_si128
1111   // CHECK: store <2 x i64> zeroinitializer
1112   return _mm_setzero_si128();
1113 }
1114 
test_mm_shuffle_epi32(__m128i A)1115 __m128i test_mm_shuffle_epi32(__m128i A) {
1116   // CHECK-LABEL: test_mm_shuffle_epi32
1117   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer
1118   return _mm_shuffle_epi32(A, 0);
1119 }
1120 
test_mm_shuffle_pd(__m128d A,__m128d B)1121 __m128d test_mm_shuffle_pd(__m128d A, __m128d B) {
1122   // CHECK-LABEL: test_mm_shuffle_pd
1123   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 2>
1124   return _mm_shuffle_pd(A, B, 1);
1125 }
1126 
test_mm_shufflehi_epi16(__m128i A)1127 __m128i test_mm_shufflehi_epi16(__m128i A) {
1128   // CHECK-LABEL: test_mm_shufflehi_epi16
1129   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
1130   return _mm_shufflehi_epi16(A, 0);
1131 }
1132 
test_mm_shufflelo_epi16(__m128i A)1133 __m128i test_mm_shufflelo_epi16(__m128i A) {
1134   // CHECK-LABEL: test_mm_shufflelo_epi16
1135   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
1136   return _mm_shufflelo_epi16(A, 0);
1137 }
1138 
test_mm_sll_epi16(__m128i A,__m128i B)1139 __m128i test_mm_sll_epi16(__m128i A, __m128i B) {
1140   // CHECK-LABEL: test_mm_sll_epi16
1141   // CHECK: call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1142   return _mm_sll_epi16(A, B);
1143 }
1144 
test_mm_sll_epi32(__m128i A,__m128i B)1145 __m128i test_mm_sll_epi32(__m128i A, __m128i B) {
1146   // CHECK-LABEL: test_mm_sll_epi32
1147   // CHECK: call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1148   return _mm_sll_epi32(A, B);
1149 }
1150 
test_mm_sll_epi64(__m128i A,__m128i B)1151 __m128i test_mm_sll_epi64(__m128i A, __m128i B) {
1152   // CHECK-LABEL: test_mm_sll_epi64
1153   // CHECK: call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
1154   return _mm_sll_epi64(A, B);
1155 }
1156 
test_mm_slli_epi16(__m128i A)1157 __m128i test_mm_slli_epi16(__m128i A) {
1158   // CHECK-LABEL: test_mm_slli_epi16
1159   // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1160   return _mm_slli_epi16(A, 1);
1161 }
1162 
test_mm_slli_epi32(__m128i A)1163 __m128i test_mm_slli_epi32(__m128i A) {
1164   // CHECK-LABEL: test_mm_slli_epi32
1165   // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1166   return _mm_slli_epi32(A, 1);
1167 }
1168 
test_mm_slli_epi64(__m128i A)1169 __m128i test_mm_slli_epi64(__m128i A) {
1170   // CHECK-LABEL: test_mm_slli_epi64
1171   // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1172   return _mm_slli_epi64(A, 1);
1173 }
1174 
test_mm_slli_si128(__m128i A)1175 __m128i test_mm_slli_si128(__m128i A) {
1176   // CHECK-LABEL: test_mm_slli_si128
1177   // CHECK: shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
1178   return _mm_slli_si128(A, 5);
1179 }
1180 
test_mm_slli_si128_2(__m128i A)1181 __m128i test_mm_slli_si128_2(__m128i A) {
1182   // CHECK-LABEL: test_mm_slli_si128_2
1183   // CHECK: ret <2 x i64> zeroinitializer
1184   return _mm_slli_si128(A, 17);
1185 }
1186 
test_mm_sqrt_pd(__m128d A)1187 __m128d test_mm_sqrt_pd(__m128d A) {
1188   // CHECK-LABEL: test_mm_sqrt_pd
1189   // CHECK: call <2 x double> @llvm.sqrt.v2f64(<2 x double> %{{.*}})
1190   return _mm_sqrt_pd(A);
1191 }
1192 
test_mm_sqrt_sd(__m128d A,__m128d B)1193 __m128d test_mm_sqrt_sd(__m128d A, __m128d B) {
1194   // CHECK-LABEL: test_mm_sqrt_sd
1195   // CHECK: extractelement <2 x double> %{{.*}}, i64 0
1196   // CHECK: call double @llvm.sqrt.f64(double {{.*}})
1197   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
1198   return _mm_sqrt_sd(A, B);
1199 }
1200 
test_mm_sra_epi16(__m128i A,__m128i B)1201 __m128i test_mm_sra_epi16(__m128i A, __m128i B) {
1202   // CHECK-LABEL: test_mm_sra_epi16
1203   // CHECK: call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1204   return _mm_sra_epi16(A, B);
1205 }
1206 
test_mm_sra_epi32(__m128i A,__m128i B)1207 __m128i test_mm_sra_epi32(__m128i A, __m128i B) {
1208   // CHECK-LABEL: test_mm_sra_epi32
1209   // CHECK: call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1210   return _mm_sra_epi32(A, B);
1211 }
1212 
test_mm_srai_epi16(__m128i A)1213 __m128i test_mm_srai_epi16(__m128i A) {
1214   // CHECK-LABEL: test_mm_srai_epi16
1215   // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1216   return _mm_srai_epi16(A, 1);
1217 }
1218 
test_mm_srai_epi32(__m128i A)1219 __m128i test_mm_srai_epi32(__m128i A) {
1220   // CHECK-LABEL: test_mm_srai_epi32
1221   // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1222   return _mm_srai_epi32(A, 1);
1223 }
1224 
test_mm_srl_epi16(__m128i A,__m128i B)1225 __m128i test_mm_srl_epi16(__m128i A, __m128i B) {
1226   // CHECK-LABEL: test_mm_srl_epi16
1227   // CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1228   return _mm_srl_epi16(A, B);
1229 }
1230 
test_mm_srl_epi32(__m128i A,__m128i B)1231 __m128i test_mm_srl_epi32(__m128i A, __m128i B) {
1232   // CHECK-LABEL: test_mm_srl_epi32
1233   // CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1234   return _mm_srl_epi32(A, B);
1235 }
1236 
test_mm_srl_epi64(__m128i A,__m128i B)1237 __m128i test_mm_srl_epi64(__m128i A, __m128i B) {
1238   // CHECK-LABEL: test_mm_srl_epi64
1239   // CHECK: call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
1240   return _mm_srl_epi64(A, B);
1241 }
1242 
test_mm_srli_epi16(__m128i A)1243 __m128i test_mm_srli_epi16(__m128i A) {
1244   // CHECK-LABEL: test_mm_srli_epi16
1245   // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1246   return _mm_srli_epi16(A, 1);
1247 }
1248 
test_mm_srli_epi32(__m128i A)1249 __m128i test_mm_srli_epi32(__m128i A) {
1250   // CHECK-LABEL: test_mm_srli_epi32
1251   // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1252   return _mm_srli_epi32(A, 1);
1253 }
1254 
test_mm_srli_epi64(__m128i A)1255 __m128i test_mm_srli_epi64(__m128i A) {
1256   // CHECK-LABEL: test_mm_srli_epi64
1257   // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1258   return _mm_srli_epi64(A, 1);
1259 }
1260 
test_mm_srli_si128(__m128i A)1261 __m128i test_mm_srli_si128(__m128i A) {
1262   // CHECK-LABEL: test_mm_srli_si128
1263   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
1264   return _mm_srli_si128(A, 5);
1265 }
1266 
test_mm_srli_si128_2(__m128i A)1267 __m128i test_mm_srli_si128_2(__m128i A) {
1268   // CHECK-LABEL: test_mm_srli_si128_2
1269   // ret <2 x i64> zeroinitializer
1270   return _mm_srli_si128(A, 17);
1271 }
1272 
test_mm_store_pd(double * A,__m128d B)1273 void test_mm_store_pd(double* A, __m128d B) {
1274   // CHECK-LABEL: test_mm_store_pd
1275   // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
1276   _mm_store_pd(A, B);
1277 }
1278 
test_mm_store_pd1(double * x,__m128d y)1279 void test_mm_store_pd1(double* x, __m128d y) {
1280   // CHECK-LABEL: test_mm_store_pd1
1281   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
1282   // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16
1283   _mm_store_pd1(x, y);
1284 }
1285 
test_mm_store_sd(double * A,__m128d B)1286 void test_mm_store_sd(double* A, __m128d B) {
1287   // CHECK-LABEL: test_mm_store_sd
1288   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1289   // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1290   _mm_store_sd(A, B);
1291 }
1292 
test_mm_store_si128(__m128i * A,__m128i B)1293 void test_mm_store_si128(__m128i* A, __m128i B) {
1294   // CHECK-LABEL: test_mm_store_si128
1295   // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
1296   _mm_store_si128(A, B);
1297 }
1298 
test_mm_store1_pd(double * x,__m128d y)1299 void test_mm_store1_pd(double* x, __m128d y) {
1300   // CHECK-LABEL: test_mm_store1_pd
1301   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
1302   // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
1303   _mm_store1_pd(x, y);
1304 }
1305 
test_mm_storeh_pd(double * A,__m128d B)1306 void test_mm_storeh_pd(double* A, __m128d B) {
1307   // CHECK-LABEL: test_mm_storeh_pd
1308   // CHECK: extractelement <2 x double> %{{.*}}, i32 1
1309   // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1310   _mm_storeh_pd(A, B);
1311 }
1312 
test_mm_storel_epi64(__m128i x,void * y)1313 void test_mm_storel_epi64(__m128i x, void* y) {
1314   // CHECK-LABEL: test_mm_storel_epi64
1315   // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
1316   // CHECK: store {{.*}} i64* {{.*}}, align 1{{$}}
1317   _mm_storel_epi64(y, x);
1318 }
1319 
test_mm_storel_pd(double * A,__m128d B)1320 void test_mm_storel_pd(double* A, __m128d B) {
1321   // CHECK-LABEL: test_mm_storel_pd
1322   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1323   // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1324   _mm_storel_pd(A, B);
1325 }
1326 
test_mm_storer_pd(__m128d A,double * B)1327 void test_mm_storer_pd(__m128d A, double* B) {
1328   // CHECK-LABEL: test_mm_storer_pd
1329   // CHECK: shufflevector <2 x double> {{.*}}, <2 x double> {{.*}}, <2 x i32> <i32 1, i32 0>
1330   // CHECK: store {{.*}} <2 x double>* {{.*}}, align 16{{$}}
1331   _mm_storer_pd(B, A);
1332 }
1333 
test_mm_storeu_pd(double * A,__m128d B)1334 void test_mm_storeu_pd(double* A, __m128d B) {
1335   // CHECK-LABEL: test_mm_storeu_pd
1336   // CHECK: store {{.*}} <2 x double>* {{.*}}, align 1{{$}}
1337   // CHECK-NEXT: ret void
1338   _mm_storeu_pd(A, B);
1339 }
1340 
test_mm_storeu_si128(__m128i * A,__m128i B)1341 void test_mm_storeu_si128(__m128i* A, __m128i B) {
1342   // CHECK-LABEL: test_mm_storeu_si128
1343   // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}}
1344   // CHECK-NEXT: ret void
1345   _mm_storeu_si128(A, B);
1346 }
1347 
test_mm_stream_pd(double * A,__m128d B)1348 void test_mm_stream_pd(double *A, __m128d B) {
1349   // CHECK-LABEL: test_mm_stream_pd
1350   // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16, !nontemporal
1351   _mm_stream_pd(A, B);
1352 }
1353 
test_mm_stream_si32(int * A,int B)1354 void test_mm_stream_si32(int *A, int B) {
1355   // CHECK-LABEL: test_mm_stream_si32
1356   // CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 1, !nontemporal
1357   _mm_stream_si32(A, B);
1358 }
1359 
test_mm_stream_si64(long long * A,long long B)1360 void test_mm_stream_si64(long long *A, long long B) {
1361   // CHECK-LABEL: test_mm_stream_si64
1362   // CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 1, !nontemporal
1363   _mm_stream_si64(A, B);
1364 }
1365 
test_mm_stream_si128(__m128i * A,__m128i B)1366 void test_mm_stream_si128(__m128i *A, __m128i B) {
1367   // CHECK-LABEL: test_mm_stream_si128
1368   // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16, !nontemporal
1369   _mm_stream_si128(A, B);
1370 }
1371 
test_mm_sub_epi8(__m128i A,__m128i B)1372 __m128i test_mm_sub_epi8(__m128i A, __m128i B) {
1373   // CHECK-LABEL: test_mm_sub_epi8
1374   // CHECK: sub <16 x i8>
1375   return _mm_sub_epi8(A, B);
1376 }
1377 
test_mm_sub_epi16(__m128i A,__m128i B)1378 __m128i test_mm_sub_epi16(__m128i A, __m128i B) {
1379   // CHECK-LABEL: test_mm_sub_epi16
1380   // CHECK: sub <8 x i16>
1381   return _mm_sub_epi16(A, B);
1382 }
1383 
test_mm_sub_epi32(__m128i A,__m128i B)1384 __m128i test_mm_sub_epi32(__m128i A, __m128i B) {
1385   // CHECK-LABEL: test_mm_sub_epi32
1386   // CHECK: sub <4 x i32>
1387   return _mm_sub_epi32(A, B);
1388 }
1389 
test_mm_sub_epi64(__m128i A,__m128i B)1390 __m128i test_mm_sub_epi64(__m128i A, __m128i B) {
1391   // CHECK-LABEL: test_mm_sub_epi64
1392   // CHECK: sub <2 x i64>
1393   return _mm_sub_epi64(A, B);
1394 }
1395 
test_mm_sub_pd(__m128d A,__m128d B)1396 __m128d test_mm_sub_pd(__m128d A, __m128d B) {
1397   // CHECK-LABEL: test_mm_sub_pd
1398   // CHECK: fsub <2 x double>
1399   return _mm_sub_pd(A, B);
1400 }
1401 
test_mm_sub_sd(__m128d A,__m128d B)1402 __m128d test_mm_sub_sd(__m128d A, __m128d B) {
1403   // CHECK-LABEL: test_mm_sub_sd
1404   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1405   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1406   // CHECK: fsub double
1407   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
1408   return _mm_sub_sd(A, B);
1409 }
1410 
test_mm_subs_epi8(__m128i A,__m128i B)1411 __m128i test_mm_subs_epi8(__m128i A, __m128i B) {
1412   // CHECK-LABEL: test_mm_subs_epi8
1413   // CHECK: call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1414   return _mm_subs_epi8(A, B);
1415 }
1416 
test_mm_subs_epi16(__m128i A,__m128i B)1417 __m128i test_mm_subs_epi16(__m128i A, __m128i B) {
1418   // CHECK-LABEL: test_mm_subs_epi16
1419   // CHECK: call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1420   return _mm_subs_epi16(A, B);
1421 }
1422 
test_mm_subs_epu8(__m128i A,__m128i B)1423 __m128i test_mm_subs_epu8(__m128i A, __m128i B) {
1424   // CHECK-LABEL: test_mm_subs_epu8
1425   // CHECK: call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1426   return _mm_subs_epu8(A, B);
1427 }
1428 
test_mm_subs_epu16(__m128i A,__m128i B)1429 __m128i test_mm_subs_epu16(__m128i A, __m128i B) {
1430   // CHECK-LABEL: test_mm_subs_epu16
1431   // CHECK: call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1432   return _mm_subs_epu16(A, B);
1433 }
1434 
test_mm_ucomieq_sd(__m128d A,__m128d B)1435 int test_mm_ucomieq_sd(__m128d A, __m128d B) {
1436   // CHECK-LABEL: test_mm_ucomieq_sd
1437   // CHECK: call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1438   return _mm_ucomieq_sd(A, B);
1439 }
1440 
test_mm_ucomige_sd(__m128d A,__m128d B)1441 int test_mm_ucomige_sd(__m128d A, __m128d B) {
1442   // CHECK-LABEL: test_mm_ucomige_sd
1443   // CHECK: call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1444   return _mm_ucomige_sd(A, B);
1445 }
1446 
test_mm_ucomigt_sd(__m128d A,__m128d B)1447 int test_mm_ucomigt_sd(__m128d A, __m128d B) {
1448   // CHECK-LABEL: test_mm_ucomigt_sd
1449   // CHECK: call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1450   return _mm_ucomigt_sd(A, B);
1451 }
1452 
test_mm_ucomile_sd(__m128d A,__m128d B)1453 int test_mm_ucomile_sd(__m128d A, __m128d B) {
1454   // CHECK-LABEL: test_mm_ucomile_sd
1455   // CHECK: call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1456   return _mm_ucomile_sd(A, B);
1457 }
1458 
test_mm_ucomilt_sd(__m128d A,__m128d B)1459 int test_mm_ucomilt_sd(__m128d A, __m128d B) {
1460   // CHECK-LABEL: test_mm_ucomilt_sd
1461   // CHECK: call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1462   return _mm_ucomilt_sd(A, B);
1463 }
1464 
test_mm_ucomineq_sd(__m128d A,__m128d B)1465 int test_mm_ucomineq_sd(__m128d A, __m128d B) {
1466   // CHECK-LABEL: test_mm_ucomineq_sd
1467   // CHECK: call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1468   return _mm_ucomineq_sd(A, B);
1469 }
1470 
test_mm_undefined_pd()1471 __m128d test_mm_undefined_pd() {
1472   // CHECK-LABEL: @test_mm_undefined_pd
1473   // CHECK: ret <2 x double> zeroinitializer
1474   return _mm_undefined_pd();
1475 }
1476 
test_mm_undefined_si128()1477 __m128i test_mm_undefined_si128() {
1478   // CHECK-LABEL: @test_mm_undefined_si128
1479   // CHECK: ret <2 x i64> zeroinitializer
1480   return _mm_undefined_si128();
1481 }
1482 
test_mm_unpackhi_epi8(__m128i A,__m128i B)1483 __m128i test_mm_unpackhi_epi8(__m128i A, __m128i B) {
1484   // CHECK-LABEL: test_mm_unpackhi_epi8
1485   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1486   return _mm_unpackhi_epi8(A, B);
1487 }
1488 
test_mm_unpackhi_epi16(__m128i A,__m128i B)1489 __m128i test_mm_unpackhi_epi16(__m128i A, __m128i B) {
1490   // CHECK-LABEL: test_mm_unpackhi_epi16
1491   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1492   return _mm_unpackhi_epi16(A, B);
1493 }
1494 
test_mm_unpackhi_epi32(__m128i A,__m128i B)1495 __m128i test_mm_unpackhi_epi32(__m128i A, __m128i B) {
1496   // CHECK-LABEL: test_mm_unpackhi_epi32
1497   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1498   return _mm_unpackhi_epi32(A, B);
1499 }
1500 
test_mm_unpackhi_epi64(__m128i A,__m128i B)1501 __m128i test_mm_unpackhi_epi64(__m128i A, __m128i B) {
1502   // CHECK-LABEL: test_mm_unpackhi_epi64
1503   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 3>
1504   return _mm_unpackhi_epi64(A, B);
1505 }
1506 
test_mm_unpackhi_pd(__m128d A,__m128d B)1507 __m128d test_mm_unpackhi_pd(__m128d A, __m128d B) {
1508   // CHECK-LABEL: test_mm_unpackhi_pd
1509   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
1510   return _mm_unpackhi_pd(A, B);
1511 }
1512 
test_mm_unpacklo_epi8(__m128i A,__m128i B)1513 __m128i test_mm_unpacklo_epi8(__m128i A, __m128i B) {
1514   // CHECK-LABEL: test_mm_unpacklo_epi8
1515   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1516   return _mm_unpacklo_epi8(A, B);
1517 }
1518 
test_mm_unpacklo_epi16(__m128i A,__m128i B)1519 __m128i test_mm_unpacklo_epi16(__m128i A, __m128i B) {
1520   // CHECK-LABEL: test_mm_unpacklo_epi16
1521   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1522   return _mm_unpacklo_epi16(A, B);
1523 }
1524 
test_mm_unpacklo_epi32(__m128i A,__m128i B)1525 __m128i test_mm_unpacklo_epi32(__m128i A, __m128i B) {
1526   // CHECK-LABEL: test_mm_unpacklo_epi32
1527   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1528   return _mm_unpacklo_epi32(A, B);
1529 }
1530 
test_mm_unpacklo_epi64(__m128i A,__m128i B)1531 __m128i test_mm_unpacklo_epi64(__m128i A, __m128i B) {
1532   // CHECK-LABEL: test_mm_unpacklo_epi64
1533   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
1534   return _mm_unpacklo_epi64(A, B);
1535 }
1536 
test_mm_unpacklo_pd(__m128d A,__m128d B)1537 __m128d test_mm_unpacklo_pd(__m128d A, __m128d B) {
1538   // CHECK-LABEL: test_mm_unpacklo_pd
1539   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2>
1540   return _mm_unpacklo_pd(A, B);
1541 }
1542 
test_mm_xor_pd(__m128d A,__m128d B)1543 __m128d test_mm_xor_pd(__m128d A, __m128d B) {
1544   // CHECK-LABEL: test_mm_xor_pd
1545   // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
1546   return _mm_xor_pd(A, B);
1547 }
1548 
test_mm_xor_si128(__m128i A,__m128i B)1549 __m128i test_mm_xor_si128(__m128i A, __m128i B) {
1550   // CHECK-LABEL: test_mm_xor_si128
1551   // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
1552   return _mm_xor_si128(A, B);
1553 }
1554