1 // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s
2 // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s
3
4
5 #include <immintrin.h>
6
7 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
8
test_mm_add_epi8(__m128i A,__m128i B)9 __m128i test_mm_add_epi8(__m128i A, __m128i B) {
10 // CHECK-LABEL: test_mm_add_epi8
11 // CHECK: add <16 x i8>
12 return _mm_add_epi8(A, B);
13 }
14
test_mm_add_epi16(__m128i A,__m128i B)15 __m128i test_mm_add_epi16(__m128i A, __m128i B) {
16 // CHECK-LABEL: test_mm_add_epi16
17 // CHECK: add <8 x i16>
18 return _mm_add_epi16(A, B);
19 }
20
test_mm_add_epi32(__m128i A,__m128i B)21 __m128i test_mm_add_epi32(__m128i A, __m128i B) {
22 // CHECK-LABEL: test_mm_add_epi32
23 // CHECK: add <4 x i32>
24 return _mm_add_epi32(A, B);
25 }
26
test_mm_add_epi64(__m128i A,__m128i B)27 __m128i test_mm_add_epi64(__m128i A, __m128i B) {
28 // CHECK-LABEL: test_mm_add_epi64
29 // CHECK: add <2 x i64>
30 return _mm_add_epi64(A, B);
31 }
32
test_mm_add_pd(__m128d A,__m128d B)33 __m128d test_mm_add_pd(__m128d A, __m128d B) {
34 // CHECK-LABEL: test_mm_add_pd
35 // CHECK: fadd <2 x double>
36 return _mm_add_pd(A, B);
37 }
38
test_mm_add_sd(__m128d A,__m128d B)39 __m128d test_mm_add_sd(__m128d A, __m128d B) {
40 // CHECK-LABEL: test_mm_add_sd
41 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
42 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
43 // CHECK: fadd double
44 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
45 return _mm_add_sd(A, B);
46 }
47
test_mm_adds_epi8(__m128i A,__m128i B)48 __m128i test_mm_adds_epi8(__m128i A, __m128i B) {
49 // CHECK-LABEL: test_mm_adds_epi8
50 // CHECK: call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
51 return _mm_adds_epi8(A, B);
52 }
53
test_mm_adds_epi16(__m128i A,__m128i B)54 __m128i test_mm_adds_epi16(__m128i A, __m128i B) {
55 // CHECK-LABEL: test_mm_adds_epi16
56 // CHECK: call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
57 return _mm_adds_epi16(A, B);
58 }
59
test_mm_adds_epu8(__m128i A,__m128i B)60 __m128i test_mm_adds_epu8(__m128i A, __m128i B) {
61 // CHECK-LABEL: test_mm_adds_epu8
62 // CHECK: call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
63 return _mm_adds_epu8(A, B);
64 }
65
test_mm_adds_epu16(__m128i A,__m128i B)66 __m128i test_mm_adds_epu16(__m128i A, __m128i B) {
67 // CHECK-LABEL: test_mm_adds_epu16
68 // CHECK: call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
69 return _mm_adds_epu16(A, B);
70 }
71
test_mm_and_pd(__m128d A,__m128d B)72 __m128d test_mm_and_pd(__m128d A, __m128d B) {
73 // CHECK-LABEL: test_mm_and_pd
74 // CHECK: and <2 x i64>
75 return _mm_and_pd(A, B);
76 }
77
test_mm_and_si128(__m128i A,__m128i B)78 __m128i test_mm_and_si128(__m128i A, __m128i B) {
79 // CHECK-LABEL: test_mm_and_si128
80 // CHECK: and <2 x i64>
81 return _mm_and_si128(A, B);
82 }
83
test_mm_andnot_pd(__m128d A,__m128d B)84 __m128d test_mm_andnot_pd(__m128d A, __m128d B) {
85 // CHECK-LABEL: test_mm_andnot_pd
86 // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
87 // CHECK: and <2 x i64>
88 return _mm_andnot_pd(A, B);
89 }
90
test_mm_andnot_si128(__m128i A,__m128i B)91 __m128i test_mm_andnot_si128(__m128i A, __m128i B) {
92 // CHECK-LABEL: test_mm_andnot_si128
93 // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
94 // CHECK: and <2 x i64>
95 return _mm_andnot_si128(A, B);
96 }
97
test_mm_avg_epu8(__m128i A,__m128i B)98 __m128i test_mm_avg_epu8(__m128i A, __m128i B) {
99 // CHECK-LABEL: test_mm_avg_epu8
100 // CHECK-NOT: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
101 // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
102 // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
103 // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
104 // CHECK: add <16 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
105 // CHECK: lshr <16 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
106 // CHECK:trunc <16 x i16> %{{.*}} to <16 x i8>
107 return _mm_avg_epu8(A, B);
108 }
109
test_mm_avg_epu16(__m128i A,__m128i B)110 __m128i test_mm_avg_epu16(__m128i A, __m128i B) {
111 // CHECK-LABEL: test_mm_avg_epu16
112 // CHECK-NOT: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
113 // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
114 // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
115 // CHECK: add <8 x i32> %{{.*}}, %{{.*}}
116 // CHECK: add <8 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
117 // CHECK: lshr <8 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
118 // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
119 return _mm_avg_epu16(A, B);
120 }
121
test_mm_bslli_si128(__m128i A)122 __m128i test_mm_bslli_si128(__m128i A) {
123 // CHECK-LABEL: test_mm_bslli_si128
124 // CHECK: shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
125 return _mm_bslli_si128(A, 5);
126 }
127
test_mm_bsrli_si128(__m128i A)128 __m128i test_mm_bsrli_si128(__m128i A) {
129 // CHECK-LABEL: test_mm_bsrli_si128
130 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
131 return _mm_bsrli_si128(A, 5);
132 }
133
test_mm_castpd_ps(__m128d A)134 __m128 test_mm_castpd_ps(__m128d A) {
135 // CHECK-LABEL: test_mm_castpd_ps
136 // CHECK: bitcast <2 x double> %{{.*}} to <4 x float>
137 return _mm_castpd_ps(A);
138 }
139
test_mm_castpd_si128(__m128d A)140 __m128i test_mm_castpd_si128(__m128d A) {
141 // CHECK-LABEL: test_mm_castpd_si128
142 // CHECK: bitcast <2 x double> %{{.*}} to <2 x i64>
143 return _mm_castpd_si128(A);
144 }
145
test_mm_castps_pd(__m128 A)146 __m128d test_mm_castps_pd(__m128 A) {
147 // CHECK-LABEL: test_mm_castps_pd
148 // CHECK: bitcast <4 x float> %{{.*}} to <2 x double>
149 return _mm_castps_pd(A);
150 }
151
test_mm_castps_si128(__m128 A)152 __m128i test_mm_castps_si128(__m128 A) {
153 // CHECK-LABEL: test_mm_castps_si128
154 // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64>
155 return _mm_castps_si128(A);
156 }
157
test_mm_castsi128_pd(__m128i A)158 __m128d test_mm_castsi128_pd(__m128i A) {
159 // CHECK-LABEL: test_mm_castsi128_pd
160 // CHECK: bitcast <2 x i64> %{{.*}} to <2 x double>
161 return _mm_castsi128_pd(A);
162 }
163
test_mm_castsi128_ps(__m128i A)164 __m128 test_mm_castsi128_ps(__m128i A) {
165 // CHECK-LABEL: test_mm_castsi128_ps
166 // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
167 return _mm_castsi128_ps(A);
168 }
169
test_mm_clflush(void * A)170 void test_mm_clflush(void* A) {
171 // CHECK-LABEL: test_mm_clflush
172 // CHECK: call void @llvm.x86.sse2.clflush(i8* %{{.*}})
173 _mm_clflush(A);
174 }
175
test_mm_cmpeq_epi8(__m128i A,__m128i B)176 __m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) {
177 // CHECK-LABEL: test_mm_cmpeq_epi8
178 // CHECK: icmp eq <16 x i8>
179 return _mm_cmpeq_epi8(A, B);
180 }
181
test_mm_cmpeq_epi16(__m128i A,__m128i B)182 __m128i test_mm_cmpeq_epi16(__m128i A, __m128i B) {
183 // CHECK-LABEL: test_mm_cmpeq_epi16
184 // CHECK: icmp eq <8 x i16>
185 return _mm_cmpeq_epi16(A, B);
186 }
187
test_mm_cmpeq_epi32(__m128i A,__m128i B)188 __m128i test_mm_cmpeq_epi32(__m128i A, __m128i B) {
189 // CHECK-LABEL: test_mm_cmpeq_epi32
190 // CHECK: icmp eq <4 x i32>
191 return _mm_cmpeq_epi32(A, B);
192 }
193
test_mm_cmpeq_pd(__m128d A,__m128d B)194 __m128d test_mm_cmpeq_pd(__m128d A, __m128d B) {
195 // CHECK-LABEL: test_mm_cmpeq_pd
196 // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double>
197 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
198 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
199 // CHECK-NEXT: ret <2 x double> [[BC]]
200 return _mm_cmpeq_pd(A, B);
201 }
202
test_mm_cmpeq_sd(__m128d A,__m128d B)203 __m128d test_mm_cmpeq_sd(__m128d A, __m128d B) {
204 // CHECK-LABEL: test_mm_cmpeq_sd
205 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
206 return _mm_cmpeq_sd(A, B);
207 }
208
test_mm_cmpge_pd(__m128d A,__m128d B)209 __m128d test_mm_cmpge_pd(__m128d A, __m128d B) {
210 // CHECK-LABEL: test_mm_cmpge_pd
211 // CHECK: [[CMP:%.*]] = fcmp ole <2 x double>
212 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
213 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
214 // CHECK-NEXT: ret <2 x double> [[BC]]
215 return _mm_cmpge_pd(A, B);
216 }
217
test_mm_cmpge_sd(__m128d A,__m128d B)218 __m128d test_mm_cmpge_sd(__m128d A, __m128d B) {
219 // CHECK-LABEL: test_mm_cmpge_sd
220 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
221 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
222 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
223 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
224 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
225 return _mm_cmpge_sd(A, B);
226 }
227
test_mm_cmpgt_epi8(__m128i A,__m128i B)228 __m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) {
229 // CHECK-LABEL: test_mm_cmpgt_epi8
230 // CHECK: icmp sgt <16 x i8>
231 return _mm_cmpgt_epi8(A, B);
232 }
233
test_mm_cmpgt_epi16(__m128i A,__m128i B)234 __m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) {
235 // CHECK-LABEL: test_mm_cmpgt_epi16
236 // CHECK: icmp sgt <8 x i16>
237 return _mm_cmpgt_epi16(A, B);
238 }
239
test_mm_cmpgt_epi32(__m128i A,__m128i B)240 __m128i test_mm_cmpgt_epi32(__m128i A, __m128i B) {
241 // CHECK-LABEL: test_mm_cmpgt_epi32
242 // CHECK: icmp sgt <4 x i32>
243 return _mm_cmpgt_epi32(A, B);
244 }
245
test_mm_cmpgt_pd(__m128d A,__m128d B)246 __m128d test_mm_cmpgt_pd(__m128d A, __m128d B) {
247 // CHECK-LABEL: test_mm_cmpgt_pd
248 // CHECK: [[CMP:%.*]] = fcmp olt <2 x double>
249 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
250 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
251 // CHECK-NEXT: ret <2 x double> [[BC]]
252 return _mm_cmpgt_pd(A, B);
253 }
254
test_mm_cmpgt_sd(__m128d A,__m128d B)255 __m128d test_mm_cmpgt_sd(__m128d A, __m128d B) {
256 // CHECK-LABEL: test_mm_cmpgt_sd
257 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
258 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
259 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
260 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
261 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
262 return _mm_cmpgt_sd(A, B);
263 }
264
test_mm_cmple_pd(__m128d A,__m128d B)265 __m128d test_mm_cmple_pd(__m128d A, __m128d B) {
266 // CHECK-LABEL: test_mm_cmple_pd
267 // CHECK: [[CMP:%.*]] = fcmp ole <2 x double>
268 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
269 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
270 // CHECK-NEXT: ret <2 x double> [[BC]]
271 return _mm_cmple_pd(A, B);
272 }
273
test_mm_cmple_sd(__m128d A,__m128d B)274 __m128d test_mm_cmple_sd(__m128d A, __m128d B) {
275 // CHECK-LABEL: test_mm_cmple_sd
276 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
277 return _mm_cmple_sd(A, B);
278 }
279
test_mm_cmplt_epi8(__m128i A,__m128i B)280 __m128i test_mm_cmplt_epi8(__m128i A, __m128i B) {
281 // CHECK-LABEL: test_mm_cmplt_epi8
282 // CHECK: icmp sgt <16 x i8>
283 return _mm_cmplt_epi8(A, B);
284 }
285
test_mm_cmplt_epi16(__m128i A,__m128i B)286 __m128i test_mm_cmplt_epi16(__m128i A, __m128i B) {
287 // CHECK-LABEL: test_mm_cmplt_epi16
288 // CHECK: icmp sgt <8 x i16>
289 return _mm_cmplt_epi16(A, B);
290 }
291
test_mm_cmplt_epi32(__m128i A,__m128i B)292 __m128i test_mm_cmplt_epi32(__m128i A, __m128i B) {
293 // CHECK-LABEL: test_mm_cmplt_epi32
294 // CHECK: icmp sgt <4 x i32>
295 return _mm_cmplt_epi32(A, B);
296 }
297
test_mm_cmplt_pd(__m128d A,__m128d B)298 __m128d test_mm_cmplt_pd(__m128d A, __m128d B) {
299 // CHECK-LABEL: test_mm_cmplt_pd
300 // CHECK: [[CMP:%.*]] = fcmp olt <2 x double>
301 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
302 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
303 // CHECK-NEXT: ret <2 x double> [[BC]]
304 return _mm_cmplt_pd(A, B);
305 }
306
test_mm_cmplt_sd(__m128d A,__m128d B)307 __m128d test_mm_cmplt_sd(__m128d A, __m128d B) {
308 // CHECK-LABEL: test_mm_cmplt_sd
309 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
310 return _mm_cmplt_sd(A, B);
311 }
312
test_mm_cmpneq_pd(__m128d A,__m128d B)313 __m128d test_mm_cmpneq_pd(__m128d A, __m128d B) {
314 // CHECK-LABEL: test_mm_cmpneq_pd
315 // CHECK: [[CMP:%.*]] = fcmp une <2 x double>
316 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
317 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
318 // CHECK-NEXT: ret <2 x double> [[BC]]
319 return _mm_cmpneq_pd(A, B);
320 }
321
test_mm_cmpneq_sd(__m128d A,__m128d B)322 __m128d test_mm_cmpneq_sd(__m128d A, __m128d B) {
323 // CHECK-LABEL: test_mm_cmpneq_sd
324 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
325 return _mm_cmpneq_sd(A, B);
326 }
327
test_mm_cmpnge_pd(__m128d A,__m128d B)328 __m128d test_mm_cmpnge_pd(__m128d A, __m128d B) {
329 // CHECK-LABEL: test_mm_cmpnge_pd
330 // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double>
331 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
332 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
333 // CHECK-NEXT: ret <2 x double> [[BC]]
334 return _mm_cmpnge_pd(A, B);
335 }
336
test_mm_cmpnge_sd(__m128d A,__m128d B)337 __m128d test_mm_cmpnge_sd(__m128d A, __m128d B) {
338 // CHECK-LABEL: test_mm_cmpnge_sd
339 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
340 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
341 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
342 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
343 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
344 return _mm_cmpnge_sd(A, B);
345 }
346
test_mm_cmpngt_pd(__m128d A,__m128d B)347 __m128d test_mm_cmpngt_pd(__m128d A, __m128d B) {
348 // CHECK-LABEL: test_mm_cmpngt_pd
349 // CHECK: [[CMP:%.*]] = fcmp uge <2 x double>
350 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
351 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
352 // CHECK-NEXT: ret <2 x double> [[BC]]
353 return _mm_cmpngt_pd(A, B);
354 }
355
test_mm_cmpngt_sd(__m128d A,__m128d B)356 __m128d test_mm_cmpngt_sd(__m128d A, __m128d B) {
357 // CHECK-LABEL: test_mm_cmpngt_sd
358 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
359 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
360 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
361 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
362 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
363 return _mm_cmpngt_sd(A, B);
364 }
365
test_mm_cmpnle_pd(__m128d A,__m128d B)366 __m128d test_mm_cmpnle_pd(__m128d A, __m128d B) {
367 // CHECK-LABEL: test_mm_cmpnle_pd
368 // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double>
369 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
370 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
371 // CHECK-NEXT: ret <2 x double> [[BC]]
372 return _mm_cmpnle_pd(A, B);
373 }
374
test_mm_cmpnle_sd(__m128d A,__m128d B)375 __m128d test_mm_cmpnle_sd(__m128d A, __m128d B) {
376 // CHECK-LABEL: test_mm_cmpnle_sd
377 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
378 return _mm_cmpnle_sd(A, B);
379 }
380
test_mm_cmpnlt_pd(__m128d A,__m128d B)381 __m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) {
382 // CHECK-LABEL: test_mm_cmpnlt_pd
383 // CHECK: [[CMP:%.*]] = fcmp uge <2 x double>
384 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
385 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
386 // CHECK-NEXT: ret <2 x double> [[BC]]
387 return _mm_cmpnlt_pd(A, B);
388 }
389
test_mm_cmpnlt_sd(__m128d A,__m128d B)390 __m128d test_mm_cmpnlt_sd(__m128d A, __m128d B) {
391 // CHECK-LABEL: test_mm_cmpnlt_sd
392 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
393 return _mm_cmpnlt_sd(A, B);
394 }
395
test_mm_cmpord_pd(__m128d A,__m128d B)396 __m128d test_mm_cmpord_pd(__m128d A, __m128d B) {
397 // CHECK-LABEL: test_mm_cmpord_pd
398 // CHECK: [[CMP:%.*]] = fcmp ord <2 x double>
399 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
400 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
401 // CHECK-NEXT: ret <2 x double> [[BC]]
402 return _mm_cmpord_pd(A, B);
403 }
404
test_mm_cmpord_sd(__m128d A,__m128d B)405 __m128d test_mm_cmpord_sd(__m128d A, __m128d B) {
406 // CHECK-LABEL: test_mm_cmpord_sd
407 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
408 return _mm_cmpord_sd(A, B);
409 }
410
test_mm_cmpunord_pd(__m128d A,__m128d B)411 __m128d test_mm_cmpunord_pd(__m128d A, __m128d B) {
412 // CHECK-LABEL: test_mm_cmpunord_pd
413 // CHECK: [[CMP:%.*]] = fcmp uno <2 x double>
414 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
415 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
416 // CHECK-NEXT: ret <2 x double> [[BC]]
417 return _mm_cmpunord_pd(A, B);
418 }
419
test_mm_cmpunord_sd(__m128d A,__m128d B)420 __m128d test_mm_cmpunord_sd(__m128d A, __m128d B) {
421 // CHECK-LABEL: test_mm_cmpunord_sd
422 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
423 return _mm_cmpunord_sd(A, B);
424 }
425
test_mm_comieq_sd(__m128d A,__m128d B)426 int test_mm_comieq_sd(__m128d A, __m128d B) {
427 // CHECK-LABEL: test_mm_comieq_sd
428 // CHECK: call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
429 return _mm_comieq_sd(A, B);
430 }
431
test_mm_comige_sd(__m128d A,__m128d B)432 int test_mm_comige_sd(__m128d A, __m128d B) {
433 // CHECK-LABEL: test_mm_comige_sd
434 // CHECK: call i32 @llvm.x86.sse2.comige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
435 return _mm_comige_sd(A, B);
436 }
437
test_mm_comigt_sd(__m128d A,__m128d B)438 int test_mm_comigt_sd(__m128d A, __m128d B) {
439 // CHECK-LABEL: test_mm_comigt_sd
440 // CHECK: call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
441 return _mm_comigt_sd(A, B);
442 }
443
test_mm_comile_sd(__m128d A,__m128d B)444 int test_mm_comile_sd(__m128d A, __m128d B) {
445 // CHECK-LABEL: test_mm_comile_sd
446 // CHECK: call i32 @llvm.x86.sse2.comile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
447 return _mm_comile_sd(A, B);
448 }
449
test_mm_comilt_sd(__m128d A,__m128d B)450 int test_mm_comilt_sd(__m128d A, __m128d B) {
451 // CHECK-LABEL: test_mm_comilt_sd
452 // CHECK: call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
453 return _mm_comilt_sd(A, B);
454 }
455
test_mm_comineq_sd(__m128d A,__m128d B)456 int test_mm_comineq_sd(__m128d A, __m128d B) {
457 // CHECK-LABEL: test_mm_comineq_sd
458 // CHECK: call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
459 return _mm_comineq_sd(A, B);
460 }
461
test_mm_cvtepi32_pd(__m128i A)462 __m128d test_mm_cvtepi32_pd(__m128i A) {
463 // CHECK-LABEL: test_mm_cvtepi32_pd
464 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> <i32 0, i32 1>
465 // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double>
466 return _mm_cvtepi32_pd(A);
467 }
468
test_mm_cvtepi32_ps(__m128i A)469 __m128 test_mm_cvtepi32_ps(__m128i A) {
470 // CHECK-LABEL: test_mm_cvtepi32_ps
471 // CHECK: sitofp <4 x i32> %{{.*}} to <4 x float>
472 return _mm_cvtepi32_ps(A);
473 }
474
test_mm_cvtpd_epi32(__m128d A)475 __m128i test_mm_cvtpd_epi32(__m128d A) {
476 // CHECK-LABEL: test_mm_cvtpd_epi32
477 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %{{.*}})
478 return _mm_cvtpd_epi32(A);
479 }
480
test_mm_cvtpd_ps(__m128d A)481 __m128 test_mm_cvtpd_ps(__m128d A) {
482 // CHECK-LABEL: test_mm_cvtpd_ps
483 // CHECK: call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %{{.*}})
484 return _mm_cvtpd_ps(A);
485 }
486
test_mm_cvtps_epi32(__m128 A)487 __m128i test_mm_cvtps_epi32(__m128 A) {
488 // CHECK-LABEL: test_mm_cvtps_epi32
489 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %{{.*}})
490 return _mm_cvtps_epi32(A);
491 }
492
test_mm_cvtps_pd(__m128 A)493 __m128d test_mm_cvtps_pd(__m128 A) {
494 // CHECK-LABEL: test_mm_cvtps_pd
495 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 0, i32 1>
496 // CHECK: fpext <2 x float> %{{.*}} to <2 x double>
497 return _mm_cvtps_pd(A);
498 }
499
test_mm_cvtsd_f64(__m128d A)500 double test_mm_cvtsd_f64(__m128d A) {
501 // CHECK-LABEL: test_mm_cvtsd_f64
502 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
503 return _mm_cvtsd_f64(A);
504 }
505
test_mm_cvtsd_si32(__m128d A)506 int test_mm_cvtsd_si32(__m128d A) {
507 // CHECK-LABEL: test_mm_cvtsd_si32
508 // CHECK: call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %{{.*}})
509 return _mm_cvtsd_si32(A);
510 }
511
test_mm_cvtsd_si64(__m128d A)512 long long test_mm_cvtsd_si64(__m128d A) {
513 // CHECK-LABEL: test_mm_cvtsd_si64
514 // CHECK: call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %{{.*}})
515 return _mm_cvtsd_si64(A);
516 }
517
test_mm_cvtsd_ss(__m128 A,__m128d B)518 __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) {
519 // CHECK-LABEL: test_mm_cvtsd_ss
520 // CHECK: call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %{{.*}}, <2 x double> %{{.*}})
521 return _mm_cvtsd_ss(A, B);
522 }
523
test_mm_cvtsi128_si32(__m128i A)524 int test_mm_cvtsi128_si32(__m128i A) {
525 // CHECK-LABEL: test_mm_cvtsi128_si32
526 // CHECK: extractelement <4 x i32> %{{.*}}, i32 0
527 return _mm_cvtsi128_si32(A);
528 }
529
test_mm_cvtsi128_si64(__m128i A)530 long long test_mm_cvtsi128_si64(__m128i A) {
531 // CHECK-LABEL: test_mm_cvtsi128_si64
532 // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
533 return _mm_cvtsi128_si64(A);
534 }
535
test_mm_cvtsi32_sd(__m128d A,int B)536 __m128d test_mm_cvtsi32_sd(__m128d A, int B) {
537 // CHECK-LABEL: test_mm_cvtsi32_sd
538 // CHECK: sitofp i32 %{{.*}} to double
539 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
540 return _mm_cvtsi32_sd(A, B);
541 }
542
test_mm_cvtsi32_si128(int A)543 __m128i test_mm_cvtsi32_si128(int A) {
544 // CHECK-LABEL: test_mm_cvtsi32_si128
545 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
546 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 1
547 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 2
548 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 3
549 return _mm_cvtsi32_si128(A);
550 }
551
test_mm_cvtsi64_sd(__m128d A,long long B)552 __m128d test_mm_cvtsi64_sd(__m128d A, long long B) {
553 // CHECK-LABEL: test_mm_cvtsi64_sd
554 // CHECK: sitofp i64 %{{.*}} to double
555 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
556 return _mm_cvtsi64_sd(A, B);
557 }
558
test_mm_cvtsi64_si128(long long A)559 __m128i test_mm_cvtsi64_si128(long long A) {
560 // CHECK-LABEL: test_mm_cvtsi64_si128
561 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
562 // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
563 return _mm_cvtsi64_si128(A);
564 }
565
test_mm_cvtss_sd(__m128d A,__m128 B)566 __m128d test_mm_cvtss_sd(__m128d A, __m128 B) {
567 // CHECK-LABEL: test_mm_cvtss_sd
568 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
569 // CHECK: fpext float %{{.*}} to double
570 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
571 return _mm_cvtss_sd(A, B);
572 }
573
test_mm_cvttpd_epi32(__m128d A)574 __m128i test_mm_cvttpd_epi32(__m128d A) {
575 // CHECK-LABEL: test_mm_cvttpd_epi32
576 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %{{.*}})
577 return _mm_cvttpd_epi32(A);
578 }
579
test_mm_cvttps_epi32(__m128 A)580 __m128i test_mm_cvttps_epi32(__m128 A) {
581 // CHECK-LABEL: test_mm_cvttps_epi32
582 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %{{.*}})
583 return _mm_cvttps_epi32(A);
584 }
585
test_mm_cvttsd_si32(__m128d A)586 int test_mm_cvttsd_si32(__m128d A) {
587 // CHECK-LABEL: test_mm_cvttsd_si32
588 // CHECK: call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %{{.*}})
589 return _mm_cvttsd_si32(A);
590 }
591
test_mm_cvttsd_si64(__m128d A)592 long long test_mm_cvttsd_si64(__m128d A) {
593 // CHECK-LABEL: test_mm_cvttsd_si64
594 // CHECK: call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %{{.*}})
595 return _mm_cvttsd_si64(A);
596 }
597
test_mm_div_pd(__m128d A,__m128d B)598 __m128d test_mm_div_pd(__m128d A, __m128d B) {
599 // CHECK-LABEL: test_mm_div_pd
600 // CHECK: fdiv <2 x double>
601 return _mm_div_pd(A, B);
602 }
603
test_mm_div_sd(__m128d A,__m128d B)604 __m128d test_mm_div_sd(__m128d A, __m128d B) {
605 // CHECK-LABEL: test_mm_div_sd
606 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
607 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
608 // CHECK: fdiv double
609 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
610 return _mm_div_sd(A, B);
611 }
612
613 // Lowering to pextrw requires optimization.
test_mm_extract_epi16(__m128i A)614 int test_mm_extract_epi16(__m128i A) {
615 // CHECK-LABEL: test_mm_extract_epi16
616 // CHECK: extractelement <8 x i16> %{{.*}}, {{i32|i64}} 1
617 // CHECK: zext i16 %{{.*}} to i32
618 return _mm_extract_epi16(A, 1);
619 }
620
test_mm_insert_epi16(__m128i A,int B)621 __m128i test_mm_insert_epi16(__m128i A, int B) {
622 // CHECK-LABEL: test_mm_insert_epi16
623 // CHECK: insertelement <8 x i16> %{{.*}}, {{i32|i64}} 0
624 return _mm_insert_epi16(A, B, 0);
625 }
626
test_mm_lfence()627 void test_mm_lfence() {
628 // CHECK-LABEL: test_mm_lfence
629 // CHECK: call void @llvm.x86.sse2.lfence()
630 _mm_lfence();
631 }
632
test_mm_load_pd(double const * A)633 __m128d test_mm_load_pd(double const* A) {
634 // CHECK-LABEL: test_mm_load_pd
635 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
636 return _mm_load_pd(A);
637 }
638
test_mm_load_pd1(double const * A)639 __m128d test_mm_load_pd1(double const* A) {
640 // CHECK-LABEL: test_mm_load_pd1
641 // CHECK: load double, double* %{{.*}}, align 8
642 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
643 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
644 return _mm_load_pd1(A);
645 }
646
test_mm_load_sd(double const * A)647 __m128d test_mm_load_sd(double const* A) {
648 // CHECK-LABEL: test_mm_load_sd
649 // CHECK: load double, double* %{{.*}}, align 1{{$}}
650 return _mm_load_sd(A);
651 }
652
test_mm_load_si128(__m128i const * A)653 __m128i test_mm_load_si128(__m128i const* A) {
654 // CHECK-LABEL: test_mm_load_si128
655 // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
656 return _mm_load_si128(A);
657 }
658
test_mm_load1_pd(double const * A)659 __m128d test_mm_load1_pd(double const* A) {
660 // CHECK-LABEL: test_mm_load1_pd
661 // CHECK: load double, double* %{{.*}}, align 8
662 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
663 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
664 return _mm_load1_pd(A);
665 }
666
test_mm_loadh_pd(__m128d x,void * y)667 __m128d test_mm_loadh_pd(__m128d x, void* y) {
668 // CHECK-LABEL: test_mm_loadh_pd
669 // CHECK: load double, double* %{{.*}}, align 1{{$}}
670 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
671 return _mm_loadh_pd(x, y);
672 }
673
test_mm_loadl_epi64(__m128i * y)674 __m128i test_mm_loadl_epi64(__m128i* y) {
675 // CHECK: test_mm_loadl_epi64
676 // CHECK: load i64, i64* {{.*}}, align 1{{$}}
677 // CHECK: insertelement <2 x i64> undef, i64 {{.*}}, i32 0
678 // CHECK: insertelement <2 x i64> {{.*}}, i64 0, i32 1
679 return _mm_loadl_epi64(y);
680 }
681
test_mm_loadl_pd(__m128d x,void * y)682 __m128d test_mm_loadl_pd(__m128d x, void* y) {
683 // CHECK-LABEL: test_mm_loadl_pd
684 // CHECK: load double, double* %{{.*}}, align 1{{$}}
685 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
686 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
687 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
688 return _mm_loadl_pd(x, y);
689 }
690
test_mm_loadr_pd(double const * A)691 __m128d test_mm_loadr_pd(double const* A) {
692 // CHECK-LABEL: test_mm_loadr_pd
693 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
694 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
695 return _mm_loadr_pd(A);
696 }
697
test_mm_loadu_pd(double const * A)698 __m128d test_mm_loadu_pd(double const* A) {
699 // CHECK-LABEL: test_mm_loadu_pd
700 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1{{$}}
701 return _mm_loadu_pd(A);
702 }
703
test_mm_loadu_si128(__m128i const * A)704 __m128i test_mm_loadu_si128(__m128i const* A) {
705 // CHECK-LABEL: test_mm_loadu_si128
706 // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1{{$}}
707 return _mm_loadu_si128(A);
708 }
709
test_mm_loadu_si64(void const * A)710 __m128i test_mm_loadu_si64(void const* A) {
711 // CHECK-LABEL: test_mm_loadu_si64
712 // CHECK: load i64, i64* %{{.*}}, align 1{{$}}
713 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
714 // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
715 return _mm_loadu_si64(A);
716 }
717
test_mm_madd_epi16(__m128i A,__m128i B)718 __m128i test_mm_madd_epi16(__m128i A, __m128i B) {
719 // CHECK-LABEL: test_mm_madd_epi16
720 // CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
721 return _mm_madd_epi16(A, B);
722 }
723
test_mm_maskmoveu_si128(__m128i A,__m128i B,char * C)724 void test_mm_maskmoveu_si128(__m128i A, __m128i B, char* C) {
725 // CHECK-LABEL: test_mm_maskmoveu_si128
726 // CHECK: call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8* %{{.*}})
727 _mm_maskmoveu_si128(A, B, C);
728 }
729
test_mm_max_epi16(__m128i A,__m128i B)730 __m128i test_mm_max_epi16(__m128i A, __m128i B) {
731 // CHECK-LABEL: test_mm_max_epi16
732 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[X:%.*]], [[Y:%.*]]
733 // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]]
734 return _mm_max_epi16(A, B);
735 }
736
test_mm_max_epu8(__m128i A,__m128i B)737 __m128i test_mm_max_epu8(__m128i A, __m128i B) {
738 // CHECK-LABEL: test_mm_max_epu8
739 // CHECK: [[CMP:%.*]] = icmp ugt <16 x i8> [[X:%.*]], [[Y:%.*]]
740 // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]]
741 return _mm_max_epu8(A, B);
742 }
743
test_mm_max_pd(__m128d A,__m128d B)744 __m128d test_mm_max_pd(__m128d A, __m128d B) {
745 // CHECK-LABEL: test_mm_max_pd
746 // CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
747 return _mm_max_pd(A, B);
748 }
749
test_mm_max_sd(__m128d A,__m128d B)750 __m128d test_mm_max_sd(__m128d A, __m128d B) {
751 // CHECK-LABEL: test_mm_max_sd
752 // CHECK: call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
753 return _mm_max_sd(A, B);
754 }
755
test_mm_mfence()756 void test_mm_mfence() {
757 // CHECK-LABEL: test_mm_mfence
758 // CHECK: call void @llvm.x86.sse2.mfence()
759 _mm_mfence();
760 }
761
test_mm_min_epi16(__m128i A,__m128i B)762 __m128i test_mm_min_epi16(__m128i A, __m128i B) {
763 // CHECK-LABEL: test_mm_min_epi16
764 // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> [[X:%.*]], [[Y:%.*]]
765 // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]]
766 return _mm_min_epi16(A, B);
767 }
768
test_mm_min_epu8(__m128i A,__m128i B)769 __m128i test_mm_min_epu8(__m128i A, __m128i B) {
770 // CHECK-LABEL: test_mm_min_epu8
771 // CHECK: [[CMP:%.*]] = icmp ult <16 x i8> [[X:%.*]], [[Y:%.*]]
772 // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]]
773 return _mm_min_epu8(A, B);
774 }
775
test_mm_min_pd(__m128d A,__m128d B)776 __m128d test_mm_min_pd(__m128d A, __m128d B) {
777 // CHECK-LABEL: test_mm_min_pd
778 // CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
779 return _mm_min_pd(A, B);
780 }
781
test_mm_min_sd(__m128d A,__m128d B)782 __m128d test_mm_min_sd(__m128d A, __m128d B) {
783 // CHECK-LABEL: test_mm_min_sd
784 // CHECK: call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
785 return _mm_min_sd(A, B);
786 }
787
test_mm_move_epi64(__m128i A)788 __m128i test_mm_move_epi64(__m128i A) {
789 // CHECK-LABEL: test_mm_move_epi64
790 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
791 return _mm_move_epi64(A);
792 }
793
test_mm_move_sd(__m128d A,__m128d B)794 __m128d test_mm_move_sd(__m128d A, __m128d B) {
795 // CHECK-LABEL: test_mm_move_sd
796 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
797 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
798 return _mm_move_sd(A, B);
799 }
800
test_mm_movemask_epi8(__m128i A)801 int test_mm_movemask_epi8(__m128i A) {
802 // CHECK-LABEL: test_mm_movemask_epi8
803 // CHECK: call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}})
804 return _mm_movemask_epi8(A);
805 }
806
test_mm_movemask_pd(__m128d A)807 int test_mm_movemask_pd(__m128d A) {
808 // CHECK-LABEL: test_mm_movemask_pd
809 // CHECK: call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}})
810 return _mm_movemask_pd(A);
811 }
812
test_mm_mul_epu32(__m128i A,__m128i B)813 __m128i test_mm_mul_epu32(__m128i A, __m128i B) {
814 // CHECK-LABEL: test_mm_mul_epu32
815 // CHECK: and <2 x i64> %{{.*}}, <i64 4294967295, i64 4294967295>
816 // CHECK: and <2 x i64> %{{.*}}, <i64 4294967295, i64 4294967295>
817 // CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
818 return _mm_mul_epu32(A, B);
819 }
820
test_mm_mul_pd(__m128d A,__m128d B)821 __m128d test_mm_mul_pd(__m128d A, __m128d B) {
822 // CHECK-LABEL: test_mm_mul_pd
823 // CHECK: fmul <2 x double> %{{.*}}, %{{.*}}
824 return _mm_mul_pd(A, B);
825 }
826
test_mm_mul_sd(__m128d A,__m128d B)827 __m128d test_mm_mul_sd(__m128d A, __m128d B) {
828 // CHECK-LABEL: test_mm_mul_sd
829 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
830 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
831 // CHECK: fmul double
832 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
833 return _mm_mul_sd(A, B);
834 }
835
test_mm_mulhi_epi16(__m128i A,__m128i B)836 __m128i test_mm_mulhi_epi16(__m128i A, __m128i B) {
837 // CHECK-LABEL: test_mm_mulhi_epi16
838 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
839 return _mm_mulhi_epi16(A, B);
840 }
841
test_mm_mulhi_epu16(__m128i A,__m128i B)842 __m128i test_mm_mulhi_epu16(__m128i A, __m128i B) {
843 // CHECK-LABEL: test_mm_mulhi_epu16
844 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
845 return _mm_mulhi_epu16(A, B);
846 }
847
test_mm_mullo_epi16(__m128i A,__m128i B)848 __m128i test_mm_mullo_epi16(__m128i A, __m128i B) {
849 // CHECK-LABEL: test_mm_mullo_epi16
850 // CHECK: mul <8 x i16> %{{.*}}, %{{.*}}
851 return _mm_mullo_epi16(A, B);
852 }
853
test_mm_or_pd(__m128d A,__m128d B)854 __m128d test_mm_or_pd(__m128d A, __m128d B) {
855 // CHECK-LABEL: test_mm_or_pd
856 // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
857 return _mm_or_pd(A, B);
858 }
859
test_mm_or_si128(__m128i A,__m128i B)860 __m128i test_mm_or_si128(__m128i A, __m128i B) {
861 // CHECK-LABEL: test_mm_or_si128
862 // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
863 return _mm_or_si128(A, B);
864 }
865
test_mm_packs_epi16(__m128i A,__m128i B)866 __m128i test_mm_packs_epi16(__m128i A, __m128i B) {
867 // CHECK-LABEL: test_mm_packs_epi16
868 // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
869 return _mm_packs_epi16(A, B);
870 }
871
test_mm_packs_epi32(__m128i A,__m128i B)872 __m128i test_mm_packs_epi32(__m128i A, __m128i B) {
873 // CHECK-LABEL: test_mm_packs_epi32
874 // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
875 return _mm_packs_epi32(A, B);
876 }
877
test_mm_packus_epi16(__m128i A,__m128i B)878 __m128i test_mm_packus_epi16(__m128i A, __m128i B) {
879 // CHECK-LABEL: test_mm_packus_epi16
880 // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
881 return _mm_packus_epi16(A, B);
882 }
883
test_mm_pause()884 void test_mm_pause() {
885 // CHECK-LABEL: test_mm_pause
886 // CHECK: call void @llvm.x86.sse2.pause()
887 return _mm_pause();
888 }
889
test_mm_sad_epu8(__m128i A,__m128i B)890 __m128i test_mm_sad_epu8(__m128i A, __m128i B) {
891 // CHECK-LABEL: test_mm_sad_epu8
892 // CHECK: call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
893 return _mm_sad_epu8(A, B);
894 }
895
test_mm_set_epi8(char A,char B,char C,char D,char E,char F,char G,char H,char I,char J,char K,char L,char M,char N,char O,char P)896 __m128i test_mm_set_epi8(char A, char B, char C, char D,
897 char E, char F, char G, char H,
898 char I, char J, char K, char L,
899 char M, char N, char O, char P) {
900 // CHECK-LABEL: test_mm_set_epi8
901 // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
902 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
903 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
904 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
905 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
906 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
907 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
908 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
909 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
910 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
911 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
912 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
913 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
914 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
915 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
916 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
917 return _mm_set_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
918 }
919
test_mm_set_epi16(short A,short B,short C,short D,short E,short F,short G,short H)920 __m128i test_mm_set_epi16(short A, short B, short C, short D,
921 short E, short F, short G, short H) {
922 // CHECK-LABEL: test_mm_set_epi16
923 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
924 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
925 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
926 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
927 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
928 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
929 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
930 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
931 return _mm_set_epi16(A, B, C, D, E, F, G, H);
932 }
933
test_mm_set_epi32(int A,int B,int C,int D)934 __m128i test_mm_set_epi32(int A, int B, int C, int D) {
935 // CHECK-LABEL: test_mm_set_epi32
936 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
937 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
938 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
939 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
940 return _mm_set_epi32(A, B, C, D);
941 }
942
test_mm_set_epi64(__m64 A,__m64 B)943 __m128i test_mm_set_epi64(__m64 A, __m64 B) {
944 // CHECK-LABEL: test_mm_set_epi64
945 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
946 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
947 return _mm_set_epi64(A, B);
948 }
949
test_mm_set_epi64x(long long A,long long B)950 __m128i test_mm_set_epi64x(long long A, long long B) {
951 // CHECK-LABEL: test_mm_set_epi64x
952 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
953 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
954 return _mm_set_epi64x(A, B);
955 }
956
test_mm_set_pd(double A,double B)957 __m128d test_mm_set_pd(double A, double B) {
958 // CHECK-LABEL: test_mm_set_pd
959 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
960 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
961 return _mm_set_pd(A, B);
962 }
963
test_mm_set_pd1(double A)964 __m128d test_mm_set_pd1(double A) {
965 // CHECK-LABEL: test_mm_set_pd1
966 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
967 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
968 return _mm_set_pd1(A);
969 }
970
test_mm_set_sd(double A)971 __m128d test_mm_set_sd(double A) {
972 // CHECK-LABEL: test_mm_set_sd
973 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
974 // CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1
975 return _mm_set_sd(A);
976 }
977
test_mm_set1_epi8(char A)978 __m128i test_mm_set1_epi8(char A) {
979 // CHECK-LABEL: test_mm_set1_epi8
980 // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
981 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
982 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
983 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
984 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
985 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
986 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
987 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
988 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
989 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
990 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
991 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
992 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
993 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
994 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
995 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
996 return _mm_set1_epi8(A);
997 }
998
test_mm_set1_epi16(short A)999 __m128i test_mm_set1_epi16(short A) {
1000 // CHECK-LABEL: test_mm_set1_epi16
1001 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
1002 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
1003 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
1004 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
1005 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
1006 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
1007 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
1008 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
1009 return _mm_set1_epi16(A);
1010 }
1011
test_mm_set1_epi32(int A)1012 __m128i test_mm_set1_epi32(int A) {
1013 // CHECK-LABEL: test_mm_set1_epi32
1014 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
1015 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
1016 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
1017 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
1018 return _mm_set1_epi32(A);
1019 }
1020
test_mm_set1_epi64(__m64 A)1021 __m128i test_mm_set1_epi64(__m64 A) {
1022 // CHECK-LABEL: test_mm_set1_epi64
1023 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1024 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1025 return _mm_set1_epi64(A);
1026 }
1027
test_mm_set1_epi64x(long long A)1028 __m128i test_mm_set1_epi64x(long long A) {
1029 // CHECK-LABEL: test_mm_set1_epi64x
1030 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1031 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1032 return _mm_set1_epi64x(A);
1033 }
1034
test_mm_set1_pd(double A)1035 __m128d test_mm_set1_pd(double A) {
1036 // CHECK-LABEL: test_mm_set1_pd
1037 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1038 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1039 return _mm_set1_pd(A);
1040 }
1041
test_mm_setr_epi8(char A,char B,char C,char D,char E,char F,char G,char H,char I,char J,char K,char L,char M,char N,char O,char P)1042 __m128i test_mm_setr_epi8(char A, char B, char C, char D,
1043 char E, char F, char G, char H,
1044 char I, char J, char K, char L,
1045 char M, char N, char O, char P) {
1046 // CHECK-LABEL: test_mm_setr_epi8
1047 // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
1048 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
1049 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
1050 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
1051 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
1052 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
1053 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
1054 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
1055 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
1056 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
1057 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
1058 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
1059 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
1060 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
1061 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
1062 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
1063 return _mm_setr_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
1064 }
1065
test_mm_setr_epi16(short A,short B,short C,short D,short E,short F,short G,short H)1066 __m128i test_mm_setr_epi16(short A, short B, short C, short D,
1067 short E, short F, short G, short H) {
1068 // CHECK-LABEL: test_mm_setr_epi16
1069 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
1070 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
1071 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
1072 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
1073 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
1074 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
1075 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
1076 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
1077 return _mm_setr_epi16(A, B, C, D, E, F, G, H);
1078 }
1079
test_mm_setr_epi32(int A,int B,int C,int D)1080 __m128i test_mm_setr_epi32(int A, int B, int C, int D) {
1081 // CHECK-LABEL: test_mm_setr_epi32
1082 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
1083 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
1084 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
1085 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
1086 return _mm_setr_epi32(A, B, C, D);
1087 }
1088
test_mm_setr_epi64(__m64 A,__m64 B)1089 __m128i test_mm_setr_epi64(__m64 A, __m64 B) {
1090 // CHECK-LABEL: test_mm_setr_epi64
1091 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1092 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1093 return _mm_setr_epi64(A, B);
1094 }
1095
test_mm_setr_pd(double A,double B)1096 __m128d test_mm_setr_pd(double A, double B) {
1097 // CHECK-LABEL: test_mm_setr_pd
1098 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1099 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1100 return _mm_setr_pd(A, B);
1101 }
1102
test_mm_setzero_pd()1103 __m128d test_mm_setzero_pd() {
1104 // CHECK-LABEL: test_mm_setzero_pd
1105 // CHECK: store <2 x double> zeroinitializer
1106 return _mm_setzero_pd();
1107 }
1108
test_mm_setzero_si128()1109 __m128i test_mm_setzero_si128() {
1110 // CHECK-LABEL: test_mm_setzero_si128
1111 // CHECK: store <2 x i64> zeroinitializer
1112 return _mm_setzero_si128();
1113 }
1114
test_mm_shuffle_epi32(__m128i A)1115 __m128i test_mm_shuffle_epi32(__m128i A) {
1116 // CHECK-LABEL: test_mm_shuffle_epi32
1117 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer
1118 return _mm_shuffle_epi32(A, 0);
1119 }
1120
test_mm_shuffle_pd(__m128d A,__m128d B)1121 __m128d test_mm_shuffle_pd(__m128d A, __m128d B) {
1122 // CHECK-LABEL: test_mm_shuffle_pd
1123 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 2>
1124 return _mm_shuffle_pd(A, B, 1);
1125 }
1126
test_mm_shufflehi_epi16(__m128i A)1127 __m128i test_mm_shufflehi_epi16(__m128i A) {
1128 // CHECK-LABEL: test_mm_shufflehi_epi16
1129 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
1130 return _mm_shufflehi_epi16(A, 0);
1131 }
1132
test_mm_shufflelo_epi16(__m128i A)1133 __m128i test_mm_shufflelo_epi16(__m128i A) {
1134 // CHECK-LABEL: test_mm_shufflelo_epi16
1135 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
1136 return _mm_shufflelo_epi16(A, 0);
1137 }
1138
test_mm_sll_epi16(__m128i A,__m128i B)1139 __m128i test_mm_sll_epi16(__m128i A, __m128i B) {
1140 // CHECK-LABEL: test_mm_sll_epi16
1141 // CHECK: call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1142 return _mm_sll_epi16(A, B);
1143 }
1144
test_mm_sll_epi32(__m128i A,__m128i B)1145 __m128i test_mm_sll_epi32(__m128i A, __m128i B) {
1146 // CHECK-LABEL: test_mm_sll_epi32
1147 // CHECK: call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1148 return _mm_sll_epi32(A, B);
1149 }
1150
test_mm_sll_epi64(__m128i A,__m128i B)1151 __m128i test_mm_sll_epi64(__m128i A, __m128i B) {
1152 // CHECK-LABEL: test_mm_sll_epi64
1153 // CHECK: call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
1154 return _mm_sll_epi64(A, B);
1155 }
1156
test_mm_slli_epi16(__m128i A)1157 __m128i test_mm_slli_epi16(__m128i A) {
1158 // CHECK-LABEL: test_mm_slli_epi16
1159 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1160 return _mm_slli_epi16(A, 1);
1161 }
1162
test_mm_slli_epi32(__m128i A)1163 __m128i test_mm_slli_epi32(__m128i A) {
1164 // CHECK-LABEL: test_mm_slli_epi32
1165 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1166 return _mm_slli_epi32(A, 1);
1167 }
1168
test_mm_slli_epi64(__m128i A)1169 __m128i test_mm_slli_epi64(__m128i A) {
1170 // CHECK-LABEL: test_mm_slli_epi64
1171 // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1172 return _mm_slli_epi64(A, 1);
1173 }
1174
test_mm_slli_si128(__m128i A)1175 __m128i test_mm_slli_si128(__m128i A) {
1176 // CHECK-LABEL: test_mm_slli_si128
1177 // CHECK: shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
1178 return _mm_slli_si128(A, 5);
1179 }
1180
test_mm_slli_si128_2(__m128i A)1181 __m128i test_mm_slli_si128_2(__m128i A) {
1182 // CHECK-LABEL: test_mm_slli_si128_2
1183 // CHECK: ret <2 x i64> zeroinitializer
1184 return _mm_slli_si128(A, 17);
1185 }
1186
test_mm_sqrt_pd(__m128d A)1187 __m128d test_mm_sqrt_pd(__m128d A) {
1188 // CHECK-LABEL: test_mm_sqrt_pd
1189 // CHECK: call <2 x double> @llvm.sqrt.v2f64(<2 x double> %{{.*}})
1190 return _mm_sqrt_pd(A);
1191 }
1192
test_mm_sqrt_sd(__m128d A,__m128d B)1193 __m128d test_mm_sqrt_sd(__m128d A, __m128d B) {
1194 // CHECK-LABEL: test_mm_sqrt_sd
1195 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
1196 // CHECK: call double @llvm.sqrt.f64(double {{.*}})
1197 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
1198 return _mm_sqrt_sd(A, B);
1199 }
1200
test_mm_sra_epi16(__m128i A,__m128i B)1201 __m128i test_mm_sra_epi16(__m128i A, __m128i B) {
1202 // CHECK-LABEL: test_mm_sra_epi16
1203 // CHECK: call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1204 return _mm_sra_epi16(A, B);
1205 }
1206
test_mm_sra_epi32(__m128i A,__m128i B)1207 __m128i test_mm_sra_epi32(__m128i A, __m128i B) {
1208 // CHECK-LABEL: test_mm_sra_epi32
1209 // CHECK: call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1210 return _mm_sra_epi32(A, B);
1211 }
1212
test_mm_srai_epi16(__m128i A)1213 __m128i test_mm_srai_epi16(__m128i A) {
1214 // CHECK-LABEL: test_mm_srai_epi16
1215 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1216 return _mm_srai_epi16(A, 1);
1217 }
1218
test_mm_srai_epi32(__m128i A)1219 __m128i test_mm_srai_epi32(__m128i A) {
1220 // CHECK-LABEL: test_mm_srai_epi32
1221 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1222 return _mm_srai_epi32(A, 1);
1223 }
1224
test_mm_srl_epi16(__m128i A,__m128i B)1225 __m128i test_mm_srl_epi16(__m128i A, __m128i B) {
1226 // CHECK-LABEL: test_mm_srl_epi16
1227 // CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1228 return _mm_srl_epi16(A, B);
1229 }
1230
test_mm_srl_epi32(__m128i A,__m128i B)1231 __m128i test_mm_srl_epi32(__m128i A, __m128i B) {
1232 // CHECK-LABEL: test_mm_srl_epi32
1233 // CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1234 return _mm_srl_epi32(A, B);
1235 }
1236
test_mm_srl_epi64(__m128i A,__m128i B)1237 __m128i test_mm_srl_epi64(__m128i A, __m128i B) {
1238 // CHECK-LABEL: test_mm_srl_epi64
1239 // CHECK: call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
1240 return _mm_srl_epi64(A, B);
1241 }
1242
test_mm_srli_epi16(__m128i A)1243 __m128i test_mm_srli_epi16(__m128i A) {
1244 // CHECK-LABEL: test_mm_srli_epi16
1245 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1246 return _mm_srli_epi16(A, 1);
1247 }
1248
test_mm_srli_epi32(__m128i A)1249 __m128i test_mm_srli_epi32(__m128i A) {
1250 // CHECK-LABEL: test_mm_srli_epi32
1251 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1252 return _mm_srli_epi32(A, 1);
1253 }
1254
test_mm_srli_epi64(__m128i A)1255 __m128i test_mm_srli_epi64(__m128i A) {
1256 // CHECK-LABEL: test_mm_srli_epi64
1257 // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1258 return _mm_srli_epi64(A, 1);
1259 }
1260
test_mm_srli_si128(__m128i A)1261 __m128i test_mm_srli_si128(__m128i A) {
1262 // CHECK-LABEL: test_mm_srli_si128
1263 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
1264 return _mm_srli_si128(A, 5);
1265 }
1266
test_mm_srli_si128_2(__m128i A)1267 __m128i test_mm_srli_si128_2(__m128i A) {
1268 // CHECK-LABEL: test_mm_srli_si128_2
1269 // ret <2 x i64> zeroinitializer
1270 return _mm_srli_si128(A, 17);
1271 }
1272
test_mm_store_pd(double * A,__m128d B)1273 void test_mm_store_pd(double* A, __m128d B) {
1274 // CHECK-LABEL: test_mm_store_pd
1275 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
1276 _mm_store_pd(A, B);
1277 }
1278
test_mm_store_pd1(double * x,__m128d y)1279 void test_mm_store_pd1(double* x, __m128d y) {
1280 // CHECK-LABEL: test_mm_store_pd1
1281 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
1282 // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16
1283 _mm_store_pd1(x, y);
1284 }
1285
test_mm_store_sd(double * A,__m128d B)1286 void test_mm_store_sd(double* A, __m128d B) {
1287 // CHECK-LABEL: test_mm_store_sd
1288 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1289 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1290 _mm_store_sd(A, B);
1291 }
1292
test_mm_store_si128(__m128i * A,__m128i B)1293 void test_mm_store_si128(__m128i* A, __m128i B) {
1294 // CHECK-LABEL: test_mm_store_si128
1295 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
1296 _mm_store_si128(A, B);
1297 }
1298
test_mm_store1_pd(double * x,__m128d y)1299 void test_mm_store1_pd(double* x, __m128d y) {
1300 // CHECK-LABEL: test_mm_store1_pd
1301 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
1302 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
1303 _mm_store1_pd(x, y);
1304 }
1305
test_mm_storeh_pd(double * A,__m128d B)1306 void test_mm_storeh_pd(double* A, __m128d B) {
1307 // CHECK-LABEL: test_mm_storeh_pd
1308 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
1309 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1310 _mm_storeh_pd(A, B);
1311 }
1312
test_mm_storel_epi64(__m128i x,void * y)1313 void test_mm_storel_epi64(__m128i x, void* y) {
1314 // CHECK-LABEL: test_mm_storel_epi64
1315 // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
1316 // CHECK: store {{.*}} i64* {{.*}}, align 1{{$}}
1317 _mm_storel_epi64(y, x);
1318 }
1319
test_mm_storel_pd(double * A,__m128d B)1320 void test_mm_storel_pd(double* A, __m128d B) {
1321 // CHECK-LABEL: test_mm_storel_pd
1322 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1323 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1324 _mm_storel_pd(A, B);
1325 }
1326
test_mm_storer_pd(__m128d A,double * B)1327 void test_mm_storer_pd(__m128d A, double* B) {
1328 // CHECK-LABEL: test_mm_storer_pd
1329 // CHECK: shufflevector <2 x double> {{.*}}, <2 x double> {{.*}}, <2 x i32> <i32 1, i32 0>
1330 // CHECK: store {{.*}} <2 x double>* {{.*}}, align 16{{$}}
1331 _mm_storer_pd(B, A);
1332 }
1333
test_mm_storeu_pd(double * A,__m128d B)1334 void test_mm_storeu_pd(double* A, __m128d B) {
1335 // CHECK-LABEL: test_mm_storeu_pd
1336 // CHECK: store {{.*}} <2 x double>* {{.*}}, align 1{{$}}
1337 // CHECK-NEXT: ret void
1338 _mm_storeu_pd(A, B);
1339 }
1340
test_mm_storeu_si128(__m128i * A,__m128i B)1341 void test_mm_storeu_si128(__m128i* A, __m128i B) {
1342 // CHECK-LABEL: test_mm_storeu_si128
1343 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}}
1344 // CHECK-NEXT: ret void
1345 _mm_storeu_si128(A, B);
1346 }
1347
test_mm_stream_pd(double * A,__m128d B)1348 void test_mm_stream_pd(double *A, __m128d B) {
1349 // CHECK-LABEL: test_mm_stream_pd
1350 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16, !nontemporal
1351 _mm_stream_pd(A, B);
1352 }
1353
test_mm_stream_si32(int * A,int B)1354 void test_mm_stream_si32(int *A, int B) {
1355 // CHECK-LABEL: test_mm_stream_si32
1356 // CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 1, !nontemporal
1357 _mm_stream_si32(A, B);
1358 }
1359
test_mm_stream_si64(long long * A,long long B)1360 void test_mm_stream_si64(long long *A, long long B) {
1361 // CHECK-LABEL: test_mm_stream_si64
1362 // CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 1, !nontemporal
1363 _mm_stream_si64(A, B);
1364 }
1365
test_mm_stream_si128(__m128i * A,__m128i B)1366 void test_mm_stream_si128(__m128i *A, __m128i B) {
1367 // CHECK-LABEL: test_mm_stream_si128
1368 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16, !nontemporal
1369 _mm_stream_si128(A, B);
1370 }
1371
test_mm_sub_epi8(__m128i A,__m128i B)1372 __m128i test_mm_sub_epi8(__m128i A, __m128i B) {
1373 // CHECK-LABEL: test_mm_sub_epi8
1374 // CHECK: sub <16 x i8>
1375 return _mm_sub_epi8(A, B);
1376 }
1377
test_mm_sub_epi16(__m128i A,__m128i B)1378 __m128i test_mm_sub_epi16(__m128i A, __m128i B) {
1379 // CHECK-LABEL: test_mm_sub_epi16
1380 // CHECK: sub <8 x i16>
1381 return _mm_sub_epi16(A, B);
1382 }
1383
test_mm_sub_epi32(__m128i A,__m128i B)1384 __m128i test_mm_sub_epi32(__m128i A, __m128i B) {
1385 // CHECK-LABEL: test_mm_sub_epi32
1386 // CHECK: sub <4 x i32>
1387 return _mm_sub_epi32(A, B);
1388 }
1389
test_mm_sub_epi64(__m128i A,__m128i B)1390 __m128i test_mm_sub_epi64(__m128i A, __m128i B) {
1391 // CHECK-LABEL: test_mm_sub_epi64
1392 // CHECK: sub <2 x i64>
1393 return _mm_sub_epi64(A, B);
1394 }
1395
test_mm_sub_pd(__m128d A,__m128d B)1396 __m128d test_mm_sub_pd(__m128d A, __m128d B) {
1397 // CHECK-LABEL: test_mm_sub_pd
1398 // CHECK: fsub <2 x double>
1399 return _mm_sub_pd(A, B);
1400 }
1401
test_mm_sub_sd(__m128d A,__m128d B)1402 __m128d test_mm_sub_sd(__m128d A, __m128d B) {
1403 // CHECK-LABEL: test_mm_sub_sd
1404 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1405 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1406 // CHECK: fsub double
1407 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
1408 return _mm_sub_sd(A, B);
1409 }
1410
test_mm_subs_epi8(__m128i A,__m128i B)1411 __m128i test_mm_subs_epi8(__m128i A, __m128i B) {
1412 // CHECK-LABEL: test_mm_subs_epi8
1413 // CHECK: call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1414 return _mm_subs_epi8(A, B);
1415 }
1416
test_mm_subs_epi16(__m128i A,__m128i B)1417 __m128i test_mm_subs_epi16(__m128i A, __m128i B) {
1418 // CHECK-LABEL: test_mm_subs_epi16
1419 // CHECK: call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1420 return _mm_subs_epi16(A, B);
1421 }
1422
test_mm_subs_epu8(__m128i A,__m128i B)1423 __m128i test_mm_subs_epu8(__m128i A, __m128i B) {
1424 // CHECK-LABEL: test_mm_subs_epu8
1425 // CHECK: call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1426 return _mm_subs_epu8(A, B);
1427 }
1428
test_mm_subs_epu16(__m128i A,__m128i B)1429 __m128i test_mm_subs_epu16(__m128i A, __m128i B) {
1430 // CHECK-LABEL: test_mm_subs_epu16
1431 // CHECK: call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1432 return _mm_subs_epu16(A, B);
1433 }
1434
test_mm_ucomieq_sd(__m128d A,__m128d B)1435 int test_mm_ucomieq_sd(__m128d A, __m128d B) {
1436 // CHECK-LABEL: test_mm_ucomieq_sd
1437 // CHECK: call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1438 return _mm_ucomieq_sd(A, B);
1439 }
1440
test_mm_ucomige_sd(__m128d A,__m128d B)1441 int test_mm_ucomige_sd(__m128d A, __m128d B) {
1442 // CHECK-LABEL: test_mm_ucomige_sd
1443 // CHECK: call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1444 return _mm_ucomige_sd(A, B);
1445 }
1446
test_mm_ucomigt_sd(__m128d A,__m128d B)1447 int test_mm_ucomigt_sd(__m128d A, __m128d B) {
1448 // CHECK-LABEL: test_mm_ucomigt_sd
1449 // CHECK: call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1450 return _mm_ucomigt_sd(A, B);
1451 }
1452
test_mm_ucomile_sd(__m128d A,__m128d B)1453 int test_mm_ucomile_sd(__m128d A, __m128d B) {
1454 // CHECK-LABEL: test_mm_ucomile_sd
1455 // CHECK: call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1456 return _mm_ucomile_sd(A, B);
1457 }
1458
test_mm_ucomilt_sd(__m128d A,__m128d B)1459 int test_mm_ucomilt_sd(__m128d A, __m128d B) {
1460 // CHECK-LABEL: test_mm_ucomilt_sd
1461 // CHECK: call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1462 return _mm_ucomilt_sd(A, B);
1463 }
1464
test_mm_ucomineq_sd(__m128d A,__m128d B)1465 int test_mm_ucomineq_sd(__m128d A, __m128d B) {
1466 // CHECK-LABEL: test_mm_ucomineq_sd
1467 // CHECK: call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1468 return _mm_ucomineq_sd(A, B);
1469 }
1470
test_mm_undefined_pd()1471 __m128d test_mm_undefined_pd() {
1472 // CHECK-LABEL: @test_mm_undefined_pd
1473 // CHECK: ret <2 x double> zeroinitializer
1474 return _mm_undefined_pd();
1475 }
1476
test_mm_undefined_si128()1477 __m128i test_mm_undefined_si128() {
1478 // CHECK-LABEL: @test_mm_undefined_si128
1479 // CHECK: ret <2 x i64> zeroinitializer
1480 return _mm_undefined_si128();
1481 }
1482
test_mm_unpackhi_epi8(__m128i A,__m128i B)1483 __m128i test_mm_unpackhi_epi8(__m128i A, __m128i B) {
1484 // CHECK-LABEL: test_mm_unpackhi_epi8
1485 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1486 return _mm_unpackhi_epi8(A, B);
1487 }
1488
test_mm_unpackhi_epi16(__m128i A,__m128i B)1489 __m128i test_mm_unpackhi_epi16(__m128i A, __m128i B) {
1490 // CHECK-LABEL: test_mm_unpackhi_epi16
1491 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1492 return _mm_unpackhi_epi16(A, B);
1493 }
1494
test_mm_unpackhi_epi32(__m128i A,__m128i B)1495 __m128i test_mm_unpackhi_epi32(__m128i A, __m128i B) {
1496 // CHECK-LABEL: test_mm_unpackhi_epi32
1497 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1498 return _mm_unpackhi_epi32(A, B);
1499 }
1500
test_mm_unpackhi_epi64(__m128i A,__m128i B)1501 __m128i test_mm_unpackhi_epi64(__m128i A, __m128i B) {
1502 // CHECK-LABEL: test_mm_unpackhi_epi64
1503 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 3>
1504 return _mm_unpackhi_epi64(A, B);
1505 }
1506
test_mm_unpackhi_pd(__m128d A,__m128d B)1507 __m128d test_mm_unpackhi_pd(__m128d A, __m128d B) {
1508 // CHECK-LABEL: test_mm_unpackhi_pd
1509 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
1510 return _mm_unpackhi_pd(A, B);
1511 }
1512
test_mm_unpacklo_epi8(__m128i A,__m128i B)1513 __m128i test_mm_unpacklo_epi8(__m128i A, __m128i B) {
1514 // CHECK-LABEL: test_mm_unpacklo_epi8
1515 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1516 return _mm_unpacklo_epi8(A, B);
1517 }
1518
test_mm_unpacklo_epi16(__m128i A,__m128i B)1519 __m128i test_mm_unpacklo_epi16(__m128i A, __m128i B) {
1520 // CHECK-LABEL: test_mm_unpacklo_epi16
1521 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1522 return _mm_unpacklo_epi16(A, B);
1523 }
1524
test_mm_unpacklo_epi32(__m128i A,__m128i B)1525 __m128i test_mm_unpacklo_epi32(__m128i A, __m128i B) {
1526 // CHECK-LABEL: test_mm_unpacklo_epi32
1527 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1528 return _mm_unpacklo_epi32(A, B);
1529 }
1530
test_mm_unpacklo_epi64(__m128i A,__m128i B)1531 __m128i test_mm_unpacklo_epi64(__m128i A, __m128i B) {
1532 // CHECK-LABEL: test_mm_unpacklo_epi64
1533 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
1534 return _mm_unpacklo_epi64(A, B);
1535 }
1536
test_mm_unpacklo_pd(__m128d A,__m128d B)1537 __m128d test_mm_unpacklo_pd(__m128d A, __m128d B) {
1538 // CHECK-LABEL: test_mm_unpacklo_pd
1539 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2>
1540 return _mm_unpacklo_pd(A, B);
1541 }
1542
test_mm_xor_pd(__m128d A,__m128d B)1543 __m128d test_mm_xor_pd(__m128d A, __m128d B) {
1544 // CHECK-LABEL: test_mm_xor_pd
1545 // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
1546 return _mm_xor_pd(A, B);
1547 }
1548
test_mm_xor_si128(__m128i A,__m128i B)1549 __m128i test_mm_xor_si128(__m128i A, __m128i B) {
1550 // CHECK-LABEL: test_mm_xor_si128
1551 // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
1552 return _mm_xor_si128(A, B);
1553 }
1554