1; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse | FileCheck %s
2
3define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
4  ; CHECK: addss
5  %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
6  ret <4 x float> %res
7}
8declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
9
10
11define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
12  ; CHECK: cmpordps
13  %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
14  ret <4 x float> %res
15}
16declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
17
18
19define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
20  ; CHECK: cmpordss
21  %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
22  ret <4 x float> %res
23}
24declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
25
26
27define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
28  ; CHECK: comiss
29  ; CHECK: sete
30  ; CHECK: movzbl
31  %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
32  ret i32 %res
33}
34declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
35
36
37define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
38  ; CHECK: comiss
39  ; CHECK: setae
40  ; CHECK: movzbl
41  %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
42  ret i32 %res
43}
44declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
45
46
47define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
48  ; CHECK: comiss
49  ; CHECK: seta
50  ; CHECK: movzbl
51  %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
52  ret i32 %res
53}
54declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
55
56
57define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
58  ; CHECK: comiss
59  ; CHECK: setbe
60  ; CHECK: movzbl
61  %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
62  ret i32 %res
63}
64declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
65
66
67define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
68  ; CHECK: comiss
69  ; CHECK: sbb
70  %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
71  ret i32 %res
72}
73declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
74
75
76define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
77  ; CHECK: comiss
78  ; CHECK: setne
79  ; CHECK: movzbl
80  %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
81  ret i32 %res
82}
83declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
84
85
86define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) {
87  ; CHECK: movl
88  ; CHECK: cvtsi2ss
89  %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
90  ret <4 x float> %res
91}
92declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
93
94
95define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
96  ; CHECK: cvtss2si
97  %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
98  ret i32 %res
99}
100declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
101
102
103define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
104  ; CHECK: cvttss2si
105  %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
106  ret i32 %res
107}
108declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
109
110
111define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
112  ; CHECK: divss
113  %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
114  ret <4 x float> %res
115}
116declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
117
118
119define void @test_x86_sse_ldmxcsr(i8* %a0) {
120  ; CHECK: movl
121  ; CHECK: ldmxcsr
122  call void @llvm.x86.sse.ldmxcsr(i8* %a0)
123  ret void
124}
125declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
126
127
128
129define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
130  ; CHECK: maxps
131  %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
132  ret <4 x float> %res
133}
134declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
135
136
137define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
138  ; CHECK: maxss
139  %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
140  ret <4 x float> %res
141}
142declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
143
144
145define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
146  ; CHECK: minps
147  %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
148  ret <4 x float> %res
149}
150declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
151
152
153define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
154  ; CHECK: minss
155  %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
156  ret <4 x float> %res
157}
158declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
159
160
161define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
162  ; CHECK: movmskps
163  %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
164  ret i32 %res
165}
166declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
167
168
169
170define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
171  ; CHECK: mulss
172  %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
173  ret <4 x float> %res
174}
175declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
176
177
178define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
179  ; CHECK: rcpps
180  %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
181  ret <4 x float> %res
182}
183declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
184
185
186define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
187  ; CHECK: rcpss
188  %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
189  ret <4 x float> %res
190}
191declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
192
193
194define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
195  ; CHECK: rsqrtps
196  %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
197  ret <4 x float> %res
198}
199declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
200
201
202define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
203  ; CHECK: rsqrtss
204  %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
205  ret <4 x float> %res
206}
207declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
208
209
210define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
211  ; CHECK: sqrtps
212  %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
213  ret <4 x float> %res
214}
215declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
216
217
218define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
219  ; CHECK: sqrtss
220  %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
221  ret <4 x float> %res
222}
223declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
224
225
226define void @test_x86_sse_stmxcsr(i8* %a0) {
227  ; CHECK: movl
228  ; CHECK: stmxcsr
229  call void @llvm.x86.sse.stmxcsr(i8* %a0)
230  ret void
231}
232declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
233
234
235define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
236  ; CHECK: movl
237  ; CHECK: movups
238  call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
239  ret void
240}
241declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
242
243
244define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
245  ; CHECK: subss
246  %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
247  ret <4 x float> %res
248}
249declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
250
251
252define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
253  ; CHECK: ucomiss
254  ; CHECK: sete
255  ; CHECK: movzbl
256  %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
257  ret i32 %res
258}
259declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
260
261
262define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
263  ; CHECK: ucomiss
264  ; CHECK: setae
265  ; CHECK: movzbl
266  %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
267  ret i32 %res
268}
269declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
270
271
272define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
273  ; CHECK: ucomiss
274  ; CHECK: seta
275  ; CHECK: movzbl
276  %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
277  ret i32 %res
278}
279declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
280
281
282define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
283  ; CHECK: ucomiss
284  ; CHECK: setbe
285  ; CHECK: movzbl
286  %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
287  ret i32 %res
288}
289declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
290
291
292define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
293  ; CHECK: ucomiss
294  ; CHECK: sbbl
295  %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
296  ret i32 %res
297}
298declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
299
300
301define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
302  ; CHECK: ucomiss
303  ; CHECK: setne
304  ; CHECK: movzbl
305  %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
306  ret i32 %res
307}
308declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
309