1 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s
2 // RUN: %clang_cc1 -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s
3
4
5 #include <immintrin.h>
6
7 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
8
test_mm_add_ps(__m128 A,__m128 B)9 __m128 test_mm_add_ps(__m128 A, __m128 B) {
10 // CHECK-LABEL: test_mm_add_ps
11 // CHECK: fadd <4 x float>
12 return _mm_add_ps(A, B);
13 }
14
test_mm_add_ss(__m128 A,__m128 B)15 __m128 test_mm_add_ss(__m128 A, __m128 B) {
16 // CHECK-LABEL: test_mm_add_ss
17 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
18 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
19 // CHECK: fadd float
20 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
21 return _mm_add_ss(A, B);
22 }
23
test_mm_and_ps(__m128 A,__m128 B)24 __m128 test_mm_and_ps(__m128 A, __m128 B) {
25 // CHECK-LABEL: test_mm_and_ps
26 // CHECK: and <4 x i32>
27 return _mm_and_ps(A, B);
28 }
29
test_mm_andnot_ps(__m128 A,__m128 B)30 __m128 test_mm_andnot_ps(__m128 A, __m128 B) {
31 // CHECK-LABEL: test_mm_andnot_ps
32 // CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1>
33 // CHECK: and <4 x i32>
34 return _mm_andnot_ps(A, B);
35 }
36
test_mm_cmpeq_ps(__m128 __a,__m128 __b)37 __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) {
38 // CHECK-LABEL: test_mm_cmpeq_ps
39 // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float>
40 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
41 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
42 // CHECK-NEXT: ret <4 x float> [[BC]]
43 return _mm_cmpeq_ps(__a, __b);
44 }
45
test_mm_cmpeq_ss(__m128 __a,__m128 __b)46 __m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) {
47 // CHECK-LABEL: test_mm_cmpeq_ss
48 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
49 return _mm_cmpeq_ss(__a, __b);
50 }
51
test_mm_cmpge_ps(__m128 __a,__m128 __b)52 __m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) {
53 // CHECK-LABEL: test_mm_cmpge_ps
54 // CHECK: [[CMP:%.*]] = fcmp ole <4 x float>
55 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
56 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
57 // CHECK-NEXT: ret <4 x float> [[BC]]
58 return _mm_cmpge_ps(__a, __b);
59 }
60
test_mm_cmpge_ss(__m128 __a,__m128 __b)61 __m128 test_mm_cmpge_ss(__m128 __a, __m128 __b) {
62 // CHECK-LABEL: test_mm_cmpge_ss
63 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
64 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
65 return _mm_cmpge_ss(__a, __b);
66 }
67
test_mm_cmpgt_ps(__m128 __a,__m128 __b)68 __m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) {
69 // CHECK-LABEL: test_mm_cmpgt_ps
70 // CHECK: [[CMP:%.*]] = fcmp olt <4 x float>
71 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
72 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
73 // CHECK-NEXT: ret <4 x float> [[BC]]
74 return _mm_cmpgt_ps(__a, __b);
75 }
76
test_mm_cmpgt_ss(__m128 __a,__m128 __b)77 __m128 test_mm_cmpgt_ss(__m128 __a, __m128 __b) {
78 // CHECK-LABEL: test_mm_cmpgt_ss
79 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
80 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
81 return _mm_cmpgt_ss(__a, __b);
82 }
83
test_mm_cmple_ps(__m128 __a,__m128 __b)84 __m128 test_mm_cmple_ps(__m128 __a, __m128 __b) {
85 // CHECK-LABEL: test_mm_cmple_ps
86 // CHECK: [[CMP:%.*]] = fcmp ole <4 x float>
87 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
88 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
89 // CHECK-NEXT: ret <4 x float> [[BC]]
90 return _mm_cmple_ps(__a, __b);
91 }
92
test_mm_cmple_ss(__m128 __a,__m128 __b)93 __m128 test_mm_cmple_ss(__m128 __a, __m128 __b) {
94 // CHECK-LABEL: test_mm_cmple_ss
95 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
96 return _mm_cmple_ss(__a, __b);
97 }
98
test_mm_cmplt_ps(__m128 __a,__m128 __b)99 __m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) {
100 // CHECK-LABEL: test_mm_cmplt_ps
101 // CHECK: [[CMP:%.*]] = fcmp olt <4 x float>
102 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
103 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
104 // CHECK-NEXT: ret <4 x float> [[BC]]
105 return _mm_cmplt_ps(__a, __b);
106 }
107
test_mm_cmplt_ss(__m128 __a,__m128 __b)108 __m128 test_mm_cmplt_ss(__m128 __a, __m128 __b) {
109 // CHECK-LABEL: test_mm_cmplt_ss
110 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
111 return _mm_cmplt_ss(__a, __b);
112 }
113
test_mm_cmpneq_ps(__m128 __a,__m128 __b)114 __m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) {
115 // CHECK-LABEL: test_mm_cmpneq_ps
116 // CHECK: [[CMP:%.*]] = fcmp une <4 x float>
117 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
118 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
119 // CHECK-NEXT: ret <4 x float> [[BC]]
120 return _mm_cmpneq_ps(__a, __b);
121 }
122
test_mm_cmpneq_ss(__m128 __a,__m128 __b)123 __m128 test_mm_cmpneq_ss(__m128 __a, __m128 __b) {
124 // CHECK-LABEL: test_mm_cmpneq_ss
125 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
126 return _mm_cmpneq_ss(__a, __b);
127 }
128
test_mm_cmpnge_ps(__m128 __a,__m128 __b)129 __m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) {
130 // CHECK-LABEL: test_mm_cmpnge_ps
131 // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float>
132 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
133 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
134 // CHECK-NEXT: ret <4 x float> [[BC]]
135 return _mm_cmpnge_ps(__a, __b);
136 }
137
test_mm_cmpnge_ss(__m128 __a,__m128 __b)138 __m128 test_mm_cmpnge_ss(__m128 __a, __m128 __b) {
139 // CHECK-LABEL: test_mm_cmpnge_ss
140 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
141 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
142 return _mm_cmpnge_ss(__a, __b);
143 }
144
test_mm_cmpngt_ps(__m128 __a,__m128 __b)145 __m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) {
146 // CHECK-LABEL: test_mm_cmpngt_ps
147 // CHECK: [[CMP:%.*]] = fcmp uge <4 x float>
148 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
149 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
150 // CHECK-NEXT: ret <4 x float> [[BC]]
151 return _mm_cmpngt_ps(__a, __b);
152 }
153
test_mm_cmpngt_ss(__m128 __a,__m128 __b)154 __m128 test_mm_cmpngt_ss(__m128 __a, __m128 __b) {
155 // CHECK-LABEL: test_mm_cmpngt_ss
156 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
157 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
158 return _mm_cmpngt_ss(__a, __b);
159 }
160
test_mm_cmpnle_ps(__m128 __a,__m128 __b)161 __m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) {
162 // CHECK-LABEL: test_mm_cmpnle_ps
163 // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float>
164 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
165 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
166 // CHECK-NEXT: ret <4 x float> [[BC]]
167 return _mm_cmpnle_ps(__a, __b);
168 }
169
test_mm_cmpnle_ss(__m128 __a,__m128 __b)170 __m128 test_mm_cmpnle_ss(__m128 __a, __m128 __b) {
171 // CHECK-LABEL: test_mm_cmpnle_ss
172 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
173 return _mm_cmpnle_ss(__a, __b);
174 }
175
test_mm_cmpnlt_ps(__m128 __a,__m128 __b)176 __m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) {
177 // CHECK-LABEL: test_mm_cmpnlt_ps
178 // CHECK: [[CMP:%.*]] = fcmp uge <4 x float>
179 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
180 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
181 // CHECK-NEXT: ret <4 x float> [[BC]]
182 return _mm_cmpnlt_ps(__a, __b);
183 }
184
test_mm_cmpnlt_ss(__m128 __a,__m128 __b)185 __m128 test_mm_cmpnlt_ss(__m128 __a, __m128 __b) {
186 // CHECK-LABEL: test_mm_cmpnlt_ss
187 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
188 return _mm_cmpnlt_ss(__a, __b);
189 }
190
test_mm_cmpord_ps(__m128 __a,__m128 __b)191 __m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) {
192 // CHECK-LABEL: test_mm_cmpord_ps
193 // CHECK: [[CMP:%.*]] = fcmp ord <4 x float>
194 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
195 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
196 // CHECK-NEXT: ret <4 x float> [[BC]]
197 return _mm_cmpord_ps(__a, __b);
198 }
199
test_mm_cmpord_ss(__m128 __a,__m128 __b)200 __m128 test_mm_cmpord_ss(__m128 __a, __m128 __b) {
201 // CHECK-LABEL: test_mm_cmpord_ss
202 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
203 return _mm_cmpord_ss(__a, __b);
204 }
205
test_mm_cmpunord_ps(__m128 __a,__m128 __b)206 __m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) {
207 // CHECK-LABEL: test_mm_cmpunord_ps
208 // CHECK: [[CMP:%.*]] = fcmp uno <4 x float>
209 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
210 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
211 // CHECK-NEXT: ret <4 x float> [[BC]]
212 return _mm_cmpunord_ps(__a, __b);
213 }
214
test_mm_cmpunord_ss(__m128 __a,__m128 __b)215 __m128 test_mm_cmpunord_ss(__m128 __a, __m128 __b) {
216 // CHECK-LABEL: test_mm_cmpunord_ss
217 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3)
218 return _mm_cmpunord_ss(__a, __b);
219 }
220
test_mm_comieq_ss(__m128 A,__m128 B)221 int test_mm_comieq_ss(__m128 A, __m128 B) {
222 // CHECK-LABEL: test_mm_comieq_ss
223 // CHECK: call i32 @llvm.x86.sse.comieq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
224 return _mm_comieq_ss(A, B);
225 }
226
test_mm_comige_ss(__m128 A,__m128 B)227 int test_mm_comige_ss(__m128 A, __m128 B) {
228 // CHECK-LABEL: test_mm_comige_ss
229 // CHECK: call i32 @llvm.x86.sse.comige.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
230 return _mm_comige_ss(A, B);
231 }
232
test_mm_comigt_ss(__m128 A,__m128 B)233 int test_mm_comigt_ss(__m128 A, __m128 B) {
234 // CHECK-LABEL: test_mm_comigt_ss
235 // CHECK: call i32 @llvm.x86.sse.comigt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
236 return _mm_comigt_ss(A, B);
237 }
238
test_mm_comile_ss(__m128 A,__m128 B)239 int test_mm_comile_ss(__m128 A, __m128 B) {
240 // CHECK-LABEL: test_mm_comile_ss
241 // CHECK: call i32 @llvm.x86.sse.comile.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
242 return _mm_comile_ss(A, B);
243 }
244
test_mm_comilt_ss(__m128 A,__m128 B)245 int test_mm_comilt_ss(__m128 A, __m128 B) {
246 // CHECK-LABEL: test_mm_comilt_ss
247 // CHECK: call i32 @llvm.x86.sse.comilt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
248 return _mm_comilt_ss(A, B);
249 }
250
test_mm_comineq_ss(__m128 A,__m128 B)251 int test_mm_comineq_ss(__m128 A, __m128 B) {
252 // CHECK-LABEL: test_mm_comineq_ss
253 // CHECK: call i32 @llvm.x86.sse.comineq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
254 return _mm_comineq_ss(A, B);
255 }
256
test_mm_cvt_ss2si(__m128 A)257 int test_mm_cvt_ss2si(__m128 A) {
258 // CHECK-LABEL: test_mm_cvt_ss2si
259 // CHECK: call i32 @llvm.x86.sse.cvtss2si(<4 x float> %{{.*}})
260 return _mm_cvt_ss2si(A);
261 }
262
test_mm_cvtsi32_ss(__m128 A,int B)263 __m128 test_mm_cvtsi32_ss(__m128 A, int B) {
264 // CHECK-LABEL: test_mm_cvtsi32_ss
265 // CHECK: sitofp i32 %{{.*}} to float
266 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
267 return _mm_cvtsi32_ss(A, B);
268 }
269
270 #ifdef __x86_64__
test_mm_cvtsi64_ss(__m128 A,long long B)271 __m128 test_mm_cvtsi64_ss(__m128 A, long long B) {
272 // CHECK-LABEL: test_mm_cvtsi64_ss
273 // CHECK: sitofp i64 %{{.*}} to float
274 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
275 return _mm_cvtsi64_ss(A, B);
276 }
277 #endif
278
test_mm_cvtss_f32(__m128 A)279 float test_mm_cvtss_f32(__m128 A) {
280 // CHECK-LABEL: test_mm_cvtss_f32
281 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
282 return _mm_cvtss_f32(A);
283 }
284
test_mm_cvtss_si32(__m128 A)285 int test_mm_cvtss_si32(__m128 A) {
286 // CHECK-LABEL: test_mm_cvtss_si32
287 // CHECK: call i32 @llvm.x86.sse.cvtss2si(<4 x float> %{{.*}})
288 return _mm_cvtss_si32(A);
289 }
290
291 #ifdef __x86_64__
test_mm_cvtss_si64(__m128 A)292 long long test_mm_cvtss_si64(__m128 A) {
293 // CHECK-LABEL: test_mm_cvtss_si64
294 // CHECK: call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %{{.*}})
295 return _mm_cvtss_si64(A);
296 }
297 #endif
298
test_mm_cvtt_ss2si(__m128 A)299 int test_mm_cvtt_ss2si(__m128 A) {
300 // CHECK-LABEL: test_mm_cvtt_ss2si
301 // CHECK: call i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}})
302 return _mm_cvtt_ss2si(A);
303 }
304
test_mm_cvttss_si32(__m128 A)305 int test_mm_cvttss_si32(__m128 A) {
306 // CHECK-LABEL: test_mm_cvttss_si32
307 // CHECK: call i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}})
308 return _mm_cvttss_si32(A);
309 }
310
311 #ifdef __x86_64__
test_mm_cvttss_si64(__m128 A)312 long long test_mm_cvttss_si64(__m128 A) {
313 // CHECK-LABEL: test_mm_cvttss_si64
314 // CHECK: call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %{{.*}})
315 return _mm_cvttss_si64(A);
316 }
317 #endif
318
test_mm_div_ps(__m128 A,__m128 B)319 __m128 test_mm_div_ps(__m128 A, __m128 B) {
320 // CHECK-LABEL: test_mm_div_ps
321 // CHECK: fdiv <4 x float>
322 return _mm_div_ps(A, B);
323 }
324
test_mm_div_ss(__m128 A,__m128 B)325 __m128 test_mm_div_ss(__m128 A, __m128 B) {
326 // CHECK-LABEL: test_mm_div_ss
327 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
328 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
329 // CHECK: fdiv float
330 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
331 return _mm_div_ss(A, B);
332 }
333
test_MM_GET_EXCEPTION_MASK()334 unsigned int test_MM_GET_EXCEPTION_MASK() {
335 // CHECK-LABEL: test_MM_GET_EXCEPTION_MASK
336 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}})
337 // CHECK: and i32 %{{.*}}, 8064
338 return _MM_GET_EXCEPTION_MASK();
339 }
340
test_MM_GET_EXCEPTION_STATE()341 unsigned int test_MM_GET_EXCEPTION_STATE() {
342 // CHECK-LABEL: test_MM_GET_EXCEPTION_STATE
343 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}})
344 // CHECK: and i32 %{{.*}}, 63
345 return _MM_GET_EXCEPTION_STATE();
346 }
347
test_MM_GET_FLUSH_ZERO_MODE()348 unsigned int test_MM_GET_FLUSH_ZERO_MODE() {
349 // CHECK-LABEL: test_MM_GET_FLUSH_ZERO_MODE
350 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}})
351 // CHECK: and i32 %{{.*}}, 32768
352 return _MM_GET_FLUSH_ZERO_MODE();
353 }
354
test_MM_GET_ROUNDING_MODE()355 unsigned int test_MM_GET_ROUNDING_MODE() {
356 // CHECK-LABEL: test_MM_GET_ROUNDING_MODE
357 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}})
358 // CHECK: and i32 %{{.*}}, 24576
359 return _MM_GET_ROUNDING_MODE();
360 }
361
test_mm_getcsr()362 unsigned int test_mm_getcsr() {
363 // CHECK-LABEL: test_mm_getcsr
364 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}})
365 // CHECK: load i32
366 return _mm_getcsr();
367 }
368
test_mm_load_ps(float * y)369 __m128 test_mm_load_ps(float* y) {
370 // CHECK-LABEL: test_mm_load_ps
371 // CHECK: load <4 x float>, <4 x float>* {{.*}}, align 16
372 return _mm_load_ps(y);
373 }
374
test_mm_load_ps1(float * y)375 __m128 test_mm_load_ps1(float* y) {
376 // CHECK-LABEL: test_mm_load_ps1
377 // CHECK: load float, float* %{{.*}}, align 4
378 // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0
379 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 1
380 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 2
381 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 3
382 return _mm_load_ps1(y);
383 }
384
test_mm_load_ss(float * y)385 __m128 test_mm_load_ss(float* y) {
386 // CHECK-LABEL: test_mm_load_ss
387 // CHECK: load float, float* {{.*}}, align 1{{$}}
388 // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0
389 // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 1
390 // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 2
391 // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 3
392 return _mm_load_ss(y);
393 }
394
test_mm_load1_ps(float * y)395 __m128 test_mm_load1_ps(float* y) {
396 // CHECK-LABEL: test_mm_load1_ps
397 // CHECK: load float, float* %{{.*}}, align 4
398 // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0
399 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 1
400 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 2
401 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 3
402 return _mm_load1_ps(y);
403 }
404
test_mm_loadh_pi(__m128 x,__m64 * y)405 __m128 test_mm_loadh_pi(__m128 x, __m64* y) {
406 // CHECK-LABEL: test_mm_loadh_pi
407 // CHECK: load <2 x float>, <2 x float>* {{.*}}, align 1{{$}}
408 // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1
409 // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1, i32 4, i32 5>
410 return _mm_loadh_pi(x,y);
411 }
412
test_mm_loadl_pi(__m128 x,__m64 * y)413 __m128 test_mm_loadl_pi(__m128 x, __m64* y) {
414 // CHECK-LABEL: test_mm_loadl_pi
415 // CHECK: load <2 x float>, <2 x float>* {{.*}}, align 1{{$}}
416 // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1
417 // CHECK: shufflevector {{.*}} <4 x i32> <i32 4, i32 5, i32 2, i32 3>
418 return _mm_loadl_pi(x,y);
419 }
420
test_mm_loadr_ps(float * A)421 __m128 test_mm_loadr_ps(float* A) {
422 // CHECK-LABEL: test_mm_loadr_ps
423 // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 16
424 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
425 return _mm_loadr_ps(A);
426 }
427
test_mm_loadu_ps(float * A)428 __m128 test_mm_loadu_ps(float* A) {
429 // CHECK-LABEL: test_mm_loadu_ps
430 // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 1{{$}}
431 return _mm_loadu_ps(A);
432 }
433
test_mm_max_ps(__m128 A,__m128 B)434 __m128 test_mm_max_ps(__m128 A, __m128 B) {
435 // CHECK-LABEL: test_mm_max_ps
436 // CHECK: @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
437 return _mm_max_ps(A, B);
438 }
439
test_mm_max_ss(__m128 A,__m128 B)440 __m128 test_mm_max_ss(__m128 A, __m128 B) {
441 // CHECK-LABEL: test_mm_max_ss
442 // CHECK: @llvm.x86.sse.max.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
443 return _mm_max_ss(A, B);
444 }
445
test_mm_min_ps(__m128 A,__m128 B)446 __m128 test_mm_min_ps(__m128 A, __m128 B) {
447 // CHECK-LABEL: test_mm_min_ps
448 // CHECK: @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
449 return _mm_min_ps(A, B);
450 }
451
test_mm_min_ss(__m128 A,__m128 B)452 __m128 test_mm_min_ss(__m128 A, __m128 B) {
453 // CHECK-LABEL: test_mm_min_ss
454 // CHECK: @llvm.x86.sse.min.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
455 return _mm_min_ss(A, B);
456 }
457
test_mm_move_ss(__m128 A,__m128 B)458 __m128 test_mm_move_ss(__m128 A, __m128 B) {
459 // CHECK-LABEL: test_mm_move_ss
460 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
461 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
462 return _mm_move_ss(A, B);
463 }
464
test_mm_movehl_ps(__m128 A,__m128 B)465 __m128 test_mm_movehl_ps(__m128 A, __m128 B) {
466 // CHECK-LABEL: test_mm_movehl_ps
467 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
468 return _mm_movehl_ps(A, B);
469 }
470
test_mm_movelh_ps(__m128 A,__m128 B)471 __m128 test_mm_movelh_ps(__m128 A, __m128 B) {
472 // CHECK-LABEL: test_mm_movelh_ps
473 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
474 return _mm_movelh_ps(A, B);
475 }
476
test_mm_movemask_ps(__m128 A)477 int test_mm_movemask_ps(__m128 A) {
478 // CHECK-LABEL: test_mm_movemask_ps
479 // CHECK: call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %{{.*}})
480 return _mm_movemask_ps(A);
481 }
482
test_mm_mul_ps(__m128 A,__m128 B)483 __m128 test_mm_mul_ps(__m128 A, __m128 B) {
484 // CHECK-LABEL: test_mm_mul_ps
485 // CHECK: fmul <4 x float>
486 return _mm_mul_ps(A, B);
487 }
488
test_mm_mul_ss(__m128 A,__m128 B)489 __m128 test_mm_mul_ss(__m128 A, __m128 B) {
490 // CHECK-LABEL: test_mm_mul_ss
491 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
492 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
493 // CHECK: fmul float
494 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
495 return _mm_mul_ss(A, B);
496 }
497
test_mm_or_ps(__m128 A,__m128 B)498 __m128 test_mm_or_ps(__m128 A, __m128 B) {
499 // CHECK-LABEL: test_mm_or_ps
500 // CHECK: or <4 x i32>
501 return _mm_or_ps(A, B);
502 }
503
test_mm_prefetch(char const * p)504 void test_mm_prefetch(char const* p) {
505 // CHECK-LABEL: test_mm_prefetch
506 // CHECK: call void @llvm.prefetch.p0i8(i8* {{.*}}, i32 0, i32 0, i32 1)
507 _mm_prefetch(p, 0);
508 }
509
test_mm_rcp_ps(__m128 x)510 __m128 test_mm_rcp_ps(__m128 x) {
511 // CHECK-LABEL: test_mm_rcp_ps
512 // CHECK: call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> {{.*}})
513 return _mm_rcp_ps(x);
514 }
515
test_mm_rcp_ss(__m128 x)516 __m128 test_mm_rcp_ss(__m128 x) {
517 // CHECK-LABEL: test_mm_rcp_ss
518 // CHECK: call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> {{.*}})
519 return _mm_rcp_ss(x);
520 }
521
test_mm_rsqrt_ps(__m128 x)522 __m128 test_mm_rsqrt_ps(__m128 x) {
523 // CHECK-LABEL: test_mm_rsqrt_ps
524 // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> {{.*}})
525 return _mm_rsqrt_ps(x);
526 }
527
test_mm_rsqrt_ss(__m128 x)528 __m128 test_mm_rsqrt_ss(__m128 x) {
529 // CHECK-LABEL: test_mm_rsqrt_ss
530 // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> {{.*}})
531 return _mm_rsqrt_ss(x);
532 }
533
test_MM_SET_EXCEPTION_MASK(unsigned int A)534 void test_MM_SET_EXCEPTION_MASK(unsigned int A) {
535 // CHECK-LABEL: test_MM_SET_EXCEPTION_MASK
536 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}})
537 // CHECK: load i32
538 // CHECK: and i32 {{.*}}, -8065
539 // CHECK: or i32
540 // CHECK: store i32
541 // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}})
542 _MM_SET_EXCEPTION_MASK(A);
543 }
544
test_MM_SET_EXCEPTION_STATE(unsigned int A)545 void test_MM_SET_EXCEPTION_STATE(unsigned int A) {
546 // CHECK-LABEL: test_MM_SET_EXCEPTION_STATE
547 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}})
548 // CHECK: load i32
549 // CHECK: and i32 {{.*}}, -64
550 // CHECK: or i32
551 // CHECK: store i32
552 // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}})
553 _MM_SET_EXCEPTION_STATE(A);
554 }
555
test_MM_SET_FLUSH_ZERO_MODE(unsigned int A)556 void test_MM_SET_FLUSH_ZERO_MODE(unsigned int A) {
557 // CHECK-LABEL: test_MM_SET_FLUSH_ZERO_MODE
558 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}})
559 // CHECK: load i32
560 // CHECK: and i32 {{.*}}, -32769
561 // CHECK: or i32
562 // CHECK: store i32
563 // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}})
564 _MM_SET_FLUSH_ZERO_MODE(A);
565 }
566
test_mm_set_ps(float A,float B,float C,float D)567 __m128 test_mm_set_ps(float A, float B, float C, float D) {
568 // CHECK-LABEL: test_mm_set_ps
569 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
570 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
571 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
572 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
573 return _mm_set_ps(A, B, C, D);
574 }
575
test_mm_set_ps1(float A)576 __m128 test_mm_set_ps1(float A) {
577 // CHECK-LABEL: test_mm_set_ps1
578 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
579 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
580 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
581 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
582 return _mm_set_ps1(A);
583 }
584
test_MM_SET_ROUNDING_MODE(unsigned int A)585 void test_MM_SET_ROUNDING_MODE(unsigned int A) {
586 // CHECK-LABEL: test_MM_SET_ROUNDING_MODE
587 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}})
588 // CHECK: load i32
589 // CHECK: and i32 {{.*}}, -24577
590 // CHECK: or i32
591 // CHECK: store i32
592 // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}})
593 _MM_SET_ROUNDING_MODE(A);
594 }
595
test_mm_set_ss(float A)596 __m128 test_mm_set_ss(float A) {
597 // CHECK-LABEL: test_mm_set_ss
598 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
599 // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 1
600 // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 2
601 // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 3
602 return _mm_set_ss(A);
603 }
604
test_mm_set1_ps(float A)605 __m128 test_mm_set1_ps(float A) {
606 // CHECK-LABEL: test_mm_set1_ps
607 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
608 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
609 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
610 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
611 return _mm_set1_ps(A);
612 }
613
test_mm_setcsr(unsigned int A)614 void test_mm_setcsr(unsigned int A) {
615 // CHECK-LABEL: test_mm_setcsr
616 // CHECK: store i32
617 // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}})
618 _mm_setcsr(A);
619 }
620
test_mm_setr_ps(float A,float B,float C,float D)621 __m128 test_mm_setr_ps(float A, float B, float C, float D) {
622 // CHECK-LABEL: test_mm_setr_ps
623 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
624 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
625 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
626 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
627 return _mm_setr_ps(A, B, C, D);
628 }
629
test_mm_setzero_ps()630 __m128 test_mm_setzero_ps() {
631 // CHECK-LABEL: test_mm_setzero_ps
632 // CHECK: store <4 x float> zeroinitializer
633 return _mm_setzero_ps();
634 }
635
test_mm_sfence()636 void test_mm_sfence() {
637 // CHECK-LABEL: test_mm_sfence
638 // CHECK: call void @llvm.x86.sse.sfence()
639 _mm_sfence();
640 }
641
test_mm_shuffle_ps(__m128 A,__m128 B)642 __m128 test_mm_shuffle_ps(__m128 A, __m128 B) {
643 // CHECK-LABEL: test_mm_shuffle_ps
644 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
645 return _mm_shuffle_ps(A, B, 0);
646 }
647
test_mm_sqrt_ps(__m128 x)648 __m128 test_mm_sqrt_ps(__m128 x) {
649 // CHECK-LABEL: test_mm_sqrt_ps
650 // CHECK: call <4 x float> @llvm.sqrt.v4f32(<4 x float> {{.*}})
651 return _mm_sqrt_ps(x);
652 }
653
test_mm_sqrt_ss(__m128 x)654 __m128 test_mm_sqrt_ss(__m128 x) {
655 // CHECK-LABEL: test_mm_sqrt_ss
656 // CHECK: extractelement <4 x float> {{.*}}, i64 0
657 // CHECK: call float @llvm.sqrt.f32(float {{.*}})
658 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i64 0
659 return _mm_sqrt_ss(x);
660 }
661
test_mm_store_ps(float * x,__m128 y)662 void test_mm_store_ps(float* x, __m128 y) {
663 // CHECK-LABEL: test_mm_store_ps
664 // CHECK: store <4 x float> %{{.*}}, <4 x float>* {{.*}}, align 16
665 _mm_store_ps(x, y);
666 }
667
test_mm_store_ps1(float * x,__m128 y)668 void test_mm_store_ps1(float* x, __m128 y) {
669 // CHECK-LABEL: test_mm_store_ps1
670 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer
671 // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16
672 _mm_store_ps1(x, y);
673 }
674
test_mm_store_ss(float * x,__m128 y)675 void test_mm_store_ss(float* x, __m128 y) {
676 // CHECK-LABEL: test_mm_store_ss
677 // CHECK: extractelement <4 x float> {{.*}}, i32 0
678 // CHECK: store float %{{.*}}, float* {{.*}}, align 1{{$}}
679 _mm_store_ss(x, y);
680 }
681
test_mm_store1_ps(float * x,__m128 y)682 void test_mm_store1_ps(float* x, __m128 y) {
683 // CHECK-LABEL: test_mm_store1_ps
684 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer
685 // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16
686 _mm_store1_ps(x, y);
687 }
688
test_mm_storeh_pi(__m64 * x,__m128 y)689 void test_mm_storeh_pi(__m64* x, __m128 y) {
690 // CHECK-LABEL: test_mm_storeh_pi
691 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 2, i32 3>
692 // CHECK: store <2 x float> %{{.*}}, <2 x float>* %{{.*}}, align 1{{$}}
693 _mm_storeh_pi(x, y);
694 }
695
test_mm_storel_pi(__m64 * x,__m128 y)696 void test_mm_storel_pi(__m64* x, __m128 y) {
697 // CHECK-LABEL: test_mm_storel_pi
698 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 0, i32 1>
699 // CHECK: store <2 x float> %{{.*}}, <2 x float>* %{{.*}}, align 1{{$}}
700 _mm_storel_pi(x, y);
701 }
702
test_mm_storer_ps(float * x,__m128 y)703 void test_mm_storer_ps(float* x, __m128 y) {
704 // CHECK-LABEL: test_mm_storer_ps
705 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
706 // CHECK: store <4 x float> %{{.*}}, <4 x float>* {{.*}}, align 16
707 _mm_storer_ps(x, y);
708 }
709
test_mm_storeu_ps(float * x,__m128 y)710 void test_mm_storeu_ps(float* x, __m128 y) {
711 // CHECK-LABEL: test_mm_storeu_ps
712 // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 1{{$}}
713 // CHECK-NEXT: ret void
714 _mm_storeu_ps(x, y);
715 }
716
test_mm_stream_ps(float * A,__m128 B)717 void test_mm_stream_ps(float*A, __m128 B) {
718 // CHECK-LABEL: test_mm_stream_ps
719 // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16, !nontemporal
720 _mm_stream_ps(A, B);
721 }
722
test_mm_sub_ps(__m128 A,__m128 B)723 __m128 test_mm_sub_ps(__m128 A, __m128 B) {
724 // CHECK-LABEL: test_mm_sub_ps
725 // CHECK: fsub <4 x float>
726 return _mm_sub_ps(A, B);
727 }
728
test_mm_sub_ss(__m128 A,__m128 B)729 __m128 test_mm_sub_ss(__m128 A, __m128 B) {
730 // CHECK-LABEL: test_mm_sub_ss
731 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
732 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
733 // CHECK: fsub float
734 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
735 return _mm_sub_ss(A, B);
736 }
737
test_MM_TRANSPOSE4_PS(__m128 * A,__m128 * B,__m128 * C,__m128 * D)738 void test_MM_TRANSPOSE4_PS(__m128 *A, __m128 *B, __m128 *C, __m128 *D) {
739 // CHECK-LABEL: test_MM_TRANSPOSE4_PS
740 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
741 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
742 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
743 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
744 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
745 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
746 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
747 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
748 _MM_TRANSPOSE4_PS(*A, *B, *C, *D);
749 }
750
test_mm_ucomieq_ss(__m128 A,__m128 B)751 int test_mm_ucomieq_ss(__m128 A, __m128 B) {
752 // CHECK-LABEL: test_mm_ucomieq_ss
753 // CHECK: call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
754 return _mm_ucomieq_ss(A, B);
755 }
756
test_mm_ucomige_ss(__m128 A,__m128 B)757 int test_mm_ucomige_ss(__m128 A, __m128 B) {
758 // CHECK-LABEL: test_mm_ucomige_ss
759 // CHECK: call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
760 return _mm_ucomige_ss(A, B);
761 }
762
test_mm_ucomigt_ss(__m128 A,__m128 B)763 int test_mm_ucomigt_ss(__m128 A, __m128 B) {
764 // CHECK-LABEL: test_mm_ucomigt_ss
765 // CHECK: call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
766 return _mm_ucomigt_ss(A, B);
767 }
768
test_mm_ucomile_ss(__m128 A,__m128 B)769 int test_mm_ucomile_ss(__m128 A, __m128 B) {
770 // CHECK-LABEL: test_mm_ucomile_ss
771 // CHECK: call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
772 return _mm_ucomile_ss(A, B);
773 }
774
test_mm_ucomilt_ss(__m128 A,__m128 B)775 int test_mm_ucomilt_ss(__m128 A, __m128 B) {
776 // CHECK-LABEL: test_mm_ucomilt_ss
777 // CHECK: call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
778 return _mm_ucomilt_ss(A, B);
779 }
780
test_mm_ucomineq_ss(__m128 A,__m128 B)781 int test_mm_ucomineq_ss(__m128 A, __m128 B) {
782 // CHECK-LABEL: test_mm_ucomineq_ss
783 // CHECK: call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
784 return _mm_ucomineq_ss(A, B);
785 }
786
test_mm_undefined_ps()787 __m128 test_mm_undefined_ps() {
788 // CHECK-LABEL: test_mm_undefined_ps
789 // CHECK: ret <4 x float> zeroinitializer
790 return _mm_undefined_ps();
791 }
792
test_mm_unpackhi_ps(__m128 A,__m128 B)793 __m128 test_mm_unpackhi_ps(__m128 A, __m128 B) {
794 // CHECK-LABEL: test_mm_unpackhi_ps
795 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
796 return _mm_unpackhi_ps(A, B);
797 }
798
test_mm_unpacklo_ps(__m128 A,__m128 B)799 __m128 test_mm_unpacklo_ps(__m128 A, __m128 B) {
800 // CHECK-LABEL: test_mm_unpacklo_ps
801 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
802 return _mm_unpacklo_ps(A, B);
803 }
804
test_mm_xor_ps(__m128 A,__m128 B)805 __m128 test_mm_xor_ps(__m128 A, __m128 B) {
806 // CHECK-LABEL: test_mm_xor_ps
807 // CHECK: xor <4 x i32>
808 return _mm_xor_ps(A, B);
809 }
810