1; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s
2
3define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
4  ; CHECK: vaesdec
5  %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
6  ret <2 x i64> %res
7}
8declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
9
10
11define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
12  ; CHECK: vaesdeclast
13  %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
14  ret <2 x i64> %res
15}
16declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
17
18
19define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
20  ; CHECK: vaesenc
21  %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
22  ret <2 x i64> %res
23}
24declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
25
26
27define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
28  ; CHECK: vaesenclast
29  %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
30  ret <2 x i64> %res
31}
32declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
33
34
35define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) {
36  ; CHECK: vaesimc
37  %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
38  ret <2 x i64> %res
39}
40declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
41
42
43define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) {
44  ; CHECK: vaeskeygenassist
45  %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
46  ret <2 x i64> %res
47}
48declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
49
50
51define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
52  ; CHECK: vaddsd
53  %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
54  ret <2 x double> %res
55}
56declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
57
58
59define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
60  ; CHECK: vcmpordpd
61  %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
62  ret <2 x double> %res
63}
64declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
65
66
67define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
68  ; CHECK: vcmpordsd
69  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
70  ret <2 x double> %res
71}
72declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
73
74
75define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
76  ; CHECK: vcomisd
77  ; CHECK: sete
78  ; CHECK: movzbl
79  %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
80  ret i32 %res
81}
82declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
83
84
85define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
86  ; CHECK: vcomisd
87  ; CHECK: setae
88  ; CHECK: movzbl
89  %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
90  ret i32 %res
91}
92declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
93
94
95define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
96  ; CHECK: vcomisd
97  ; CHECK: seta
98  ; CHECK: movzbl
99  %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
100  ret i32 %res
101}
102declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
103
104
105define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
106  ; CHECK: vcomisd
107  ; CHECK: setbe
108  ; CHECK: movzbl
109  %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
110  ret i32 %res
111}
112declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
113
114
115define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
116  ; CHECK: vcomisd
117  ; CHECK: sbbl    %eax, %eax
118  ; CHECK: andl    $1, %eax
119  %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
120  ret i32 %res
121}
122declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
123
124
125define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
126  ; CHECK: vcomisd
127  ; CHECK: setne
128  ; CHECK: movzbl
129  %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
130  ret i32 %res
131}
132declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
133
134
135define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
136  ; CHECK: vcvtdq2pd
137  %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
138  ret <2 x double> %res
139}
140declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
141
142
143define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
144  ; CHECK: vcvtdq2ps
145  %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
146  ret <4 x float> %res
147}
148declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
149
150
151define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
152  ; CHECK: vcvtpd2dq
153  %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
154  ret <4 x i32> %res
155}
156declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
157
158
159define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
160  ; CHECK: vcvtpd2ps
161  %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
162  ret <4 x float> %res
163}
164declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
165
166
167define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
168  ; CHECK: vcvtps2dq
169  %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
170  ret <4 x i32> %res
171}
172declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
173
174
175define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
176  ; CHECK: vcvtps2pd
177  %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
178  ret <2 x double> %res
179}
180declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
181
182
183define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
184  ; CHECK: vcvtsd2si
185  %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
186  ret i32 %res
187}
188declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
189
190
191define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
192  ; CHECK: vcvtsd2ss
193  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
194  ret <4 x float> %res
195}
196declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
197
198
199define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) {
200  ; CHECK: movl
201  ; CHECK: vcvtsi2sd
202  %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
203  ret <2 x double> %res
204}
205declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
206
207
208define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
209  ; CHECK: vcvtss2sd
210  %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
211  ret <2 x double> %res
212}
213declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
214
215
216define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
217  ; CHECK: vcvttpd2dq
218  %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
219  ret <4 x i32> %res
220}
221declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
222
223
224define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
225  ; CHECK: vcvttps2dq
226  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
227  ret <4 x i32> %res
228}
229declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
230
231
232define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
233  ; CHECK: vcvttsd2si
234  %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
235  ret i32 %res
236}
237declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
238
239
240define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
241  ; CHECK: vdivsd
242  %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
243  ret <2 x double> %res
244}
245declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
246
247
248
249define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
250  ; CHECK: vmaxpd
251  %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
252  ret <2 x double> %res
253}
254declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
255
256
257define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
258  ; CHECK: vmaxsd
259  %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
260  ret <2 x double> %res
261}
262declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
263
264
265define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
266  ; CHECK: vminpd
267  %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
268  ret <2 x double> %res
269}
270declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
271
272
273define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
274  ; CHECK: vminsd
275  %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
276  ret <2 x double> %res
277}
278declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
279
280
281define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
282  ; CHECK: vmovmskpd
283  %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
284  ret i32 %res
285}
286declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
287
288
289
290
291define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
292  ; CHECK: test_x86_sse2_mul_sd
293  ; CHECK: vmulsd
294  %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
295  ret <2 x double> %res
296}
297declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
298
299
300define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
301  ; CHECK: vpackssdw
302  %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
303  ret <8 x i16> %res
304}
305declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
306
307
308define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
309  ; CHECK: vpacksswb
310  %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
311  ret <16 x i8> %res
312}
313declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
314
315
316define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
317  ; CHECK: vpackuswb
318  %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
319  ret <16 x i8> %res
320}
321declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
322
323
324define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
325  ; CHECK: vpaddsb
326  %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
327  ret <16 x i8> %res
328}
329declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
330
331
332define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
333  ; CHECK: vpaddsw
334  %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
335  ret <8 x i16> %res
336}
337declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
338
339
340define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
341  ; CHECK: vpaddusb
342  %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
343  ret <16 x i8> %res
344}
345declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
346
347
348define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
349  ; CHECK: vpaddusw
350  %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
351  ret <8 x i16> %res
352}
353declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
354
355
356define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
357  ; CHECK: vpavgb
358  %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
359  ret <16 x i8> %res
360}
361declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
362
363
364define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
365  ; CHECK: vpavgw
366  %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
367  ret <8 x i16> %res
368}
369declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
370
371
372define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
373  ; CHECK: vpmaddwd
374  %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
375  ret <4 x i32> %res
376}
377declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
378
379
380define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
381  ; CHECK: vpmaxsw
382  %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
383  ret <8 x i16> %res
384}
385declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
386
387
388define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
389  ; CHECK: vpmaxub
390  %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
391  ret <16 x i8> %res
392}
393declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
394
395
396define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
397  ; CHECK: vpminsw
398  %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
399  ret <8 x i16> %res
400}
401declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
402
403
404define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
405  ; CHECK: vpminub
406  %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
407  ret <16 x i8> %res
408}
409declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
410
411
412define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
413  ; CHECK: vpmovmskb
414  %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
415  ret i32 %res
416}
417declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
418
419
420define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
421  ; CHECK: vpmulhw
422  %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
423  ret <8 x i16> %res
424}
425declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
426
427
428define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
429  ; CHECK: vpmulhuw
430  %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
431  ret <8 x i16> %res
432}
433declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
434
435
436define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
437  ; CHECK: vpmuludq
438  %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
439  ret <2 x i64> %res
440}
441declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
442
443
444define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
445  ; CHECK: vpsadbw
446  %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
447  ret <2 x i64> %res
448}
449declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
450
451
452define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
453  ; CHECK: vpslld
454  %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
455  ret <4 x i32> %res
456}
457declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
458
459
460define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
461  ; CHECK: vpslldq
462  %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
463  ret <2 x i64> %res
464}
465declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
466
467
468define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
469  ; CHECK: vpslldq
470  %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
471  ret <2 x i64> %res
472}
473declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
474
475
476define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
477  ; CHECK: vpsllq
478  %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
479  ret <2 x i64> %res
480}
481declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
482
483
484define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
485  ; CHECK: vpsllw
486  %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
487  ret <8 x i16> %res
488}
489declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
490
491
492define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
493  ; CHECK: vpslld
494  %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
495  ret <4 x i32> %res
496}
497declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
498
499
500define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
501  ; CHECK: vpsllq
502  %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
503  ret <2 x i64> %res
504}
505declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
506
507
508define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
509  ; CHECK: vpsllw
510  %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
511  ret <8 x i16> %res
512}
513declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
514
515
516define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
517  ; CHECK: vpsrad
518  %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
519  ret <4 x i32> %res
520}
521declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
522
523
524define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
525  ; CHECK: vpsraw
526  %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
527  ret <8 x i16> %res
528}
529declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
530
531
532define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
533  ; CHECK: vpsrad
534  %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
535  ret <4 x i32> %res
536}
537declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
538
539
540define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
541  ; CHECK: vpsraw
542  %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
543  ret <8 x i16> %res
544}
545declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
546
547
548define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
549  ; CHECK: vpsrld
550  %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
551  ret <4 x i32> %res
552}
553declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
554
555
556define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
557  ; CHECK: vpsrldq
558  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
559  ret <2 x i64> %res
560}
561declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
562
563
564define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
565  ; CHECK: vpsrldq
566  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
567  ret <2 x i64> %res
568}
569declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
570
571
572define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
573  ; CHECK: vpsrlq
574  %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
575  ret <2 x i64> %res
576}
577declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
578
579
580define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
581  ; CHECK: vpsrlw
582  %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
583  ret <8 x i16> %res
584}
585declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
586
587
588define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
589  ; CHECK: vpsrld
590  %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
591  ret <4 x i32> %res
592}
593declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
594
595
596define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
597  ; CHECK: vpsrlq
598  %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
599  ret <2 x i64> %res
600}
601declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
602
603
604define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
605  ; CHECK: vpsrlw
606  %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
607  ret <8 x i16> %res
608}
609declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
610
611
612define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
613  ; CHECK: vpsubsb
614  %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
615  ret <16 x i8> %res
616}
617declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
618
619
620define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
621  ; CHECK: vpsubsw
622  %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
623  ret <8 x i16> %res
624}
625declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
626
627
628define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
629  ; CHECK: vpsubusb
630  %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
631  ret <16 x i8> %res
632}
633declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
634
635
636define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
637  ; CHECK: vpsubusw
638  %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
639  ret <8 x i16> %res
640}
641declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
642
643
644define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
645  ; CHECK: vsqrtpd
646  %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
647  ret <2 x double> %res
648}
649declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
650
651
652define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
653  ; CHECK: vsqrtsd
654  %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
655  ret <2 x double> %res
656}
657declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
658
659
660define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
661  ; CHECK: test_x86_sse2_storel_dq
662  ; CHECK: movl
663  ; CHECK: vmovq
664  call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
665  ret void
666}
667declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
668
669
670define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
671  ; CHECK: test_x86_sse2_storeu_dq
672  ; CHECK: movl
673  ; CHECK: vmovdqu
674  ; add operation forces the execution domain.
675  %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
676  call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
677  ret void
678}
679declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
680
681
682define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
683  ; CHECK: test_x86_sse2_storeu_pd
684  ; CHECK: movl
685  ; CHECK: vmovupd
686  ; fadd operation forces the execution domain.
687  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
688  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
689  ret void
690}
691declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
692
693
694define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
695  ; CHECK: test_x86_sse2_sub_sd
696  ; CHECK: vsubsd
697  %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
698  ret <2 x double> %res
699}
700declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
701
702
703define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
704  ; CHECK: vucomisd
705  ; CHECK: sete
706  ; CHECK: movzbl
707  %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
708  ret i32 %res
709}
710declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
711
712
713define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
714  ; CHECK: vucomisd
715  ; CHECK: setae
716  ; CHECK: movzbl
717  %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
718  ret i32 %res
719}
720declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
721
722
723define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
724  ; CHECK: vucomisd
725  ; CHECK: seta
726  ; CHECK: movzbl
727  %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
728  ret i32 %res
729}
730declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
731
732
733define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
734  ; CHECK: vucomisd
735  ; CHECK: setbe
736  ; CHECK: movzbl
737  %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
738  ret i32 %res
739}
740declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
741
742
743define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
744  ; CHECK: vucomisd
745  ; CHECK: sbbl
746  %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
747  ret i32 %res
748}
749declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
750
751
752define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
753  ; CHECK: vucomisd
754  ; CHECK: setne
755  ; CHECK: movzbl
756  %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
757  ret i32 %res
758}
759declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
760
761
762define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
763  ; CHECK: vaddsubpd
764  %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
765  ret <2 x double> %res
766}
767declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
768
769
770define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
771  ; CHECK: vaddsubps
772  %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
773  ret <4 x float> %res
774}
775declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
776
777
778define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
779  ; CHECK: vhaddpd
780  %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
781  ret <2 x double> %res
782}
783declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
784
785
786define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
787  ; CHECK: vhaddps
788  %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
789  ret <4 x float> %res
790}
791declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
792
793
794define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
795  ; CHECK: vhsubpd
796  %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
797  ret <2 x double> %res
798}
799declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
800
801
802define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
803  ; CHECK: vhsubps
804  %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
805  ret <4 x float> %res
806}
807declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
808
809
810define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) {
811  ; CHECK: movl
812  ; CHECK: vlddqu
813  %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
814  ret <16 x i8> %res
815}
816declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
817
818
819define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
820  ; CHECK: vblendpd
821  %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
822  ret <2 x double> %res
823}
824declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
825
826
827define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
828  ; CHECK: vblendps
829  %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
830  ret <4 x float> %res
831}
832declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
833
834
835define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
836  ; CHECK: vblendvpd
837  %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
838  ret <2 x double> %res
839}
840declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
841
842
843define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
844  ; CHECK: vblendvps
845  %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
846  ret <4 x float> %res
847}
848declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
849
850
851define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
852  ; CHECK: vdppd
853  %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
854  ret <2 x double> %res
855}
856declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
857
858
859define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
860  ; CHECK: vdpps
861  %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
862  ret <4 x float> %res
863}
864declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
865
866
867define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
868  ; CHECK: vinsertps
869  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
870  ret <4 x float> %res
871}
872declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
873
874
875
876define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
877  ; CHECK: vmpsadbw
878  %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
879  ret <8 x i16> %res
880}
881declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
882
883
884define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
885  ; CHECK: vpackusdw
886  %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
887  ret <8 x i16> %res
888}
889declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
890
891
892define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
893  ; CHECK: vpblendvb
894  %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
895  ret <16 x i8> %res
896}
897declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
898
899
900define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
901  ; CHECK: vpblendw
902  %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
903  ret <8 x i16> %res
904}
905declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
906
907
908define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
909  ; CHECK: vphminposuw
910  %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
911  ret <8 x i16> %res
912}
913declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
914
915
916define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
917  ; CHECK: vpmaxsb
918  %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
919  ret <16 x i8> %res
920}
921declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
922
923
924define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
925  ; CHECK: vpmaxsd
926  %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
927  ret <4 x i32> %res
928}
929declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
930
931
932define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
933  ; CHECK: vpmaxud
934  %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
935  ret <4 x i32> %res
936}
937declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
938
939
940define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
941  ; CHECK: vpmaxuw
942  %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
943  ret <8 x i16> %res
944}
945declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
946
947
948define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
949  ; CHECK: vpminsb
950  %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
951  ret <16 x i8> %res
952}
953declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
954
955
956define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
957  ; CHECK: vpminsd
958  %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
959  ret <4 x i32> %res
960}
961declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
962
963
964define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
965  ; CHECK: vpminud
966  %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
967  ret <4 x i32> %res
968}
969declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
970
971
972define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
973  ; CHECK: vpminuw
974  %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
975  ret <8 x i16> %res
976}
977declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
978
979
980define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
981  ; CHECK: vpmovsxbd
982  %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
983  ret <4 x i32> %res
984}
985declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
986
987
988define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
989  ; CHECK: vpmovsxbq
990  %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
991  ret <2 x i64> %res
992}
993declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
994
995
996define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
997  ; CHECK: vpmovsxbw
998  %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
999  ret <8 x i16> %res
1000}
1001declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
1002
1003
1004define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
1005  ; CHECK: vpmovsxdq
1006  %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
1007  ret <2 x i64> %res
1008}
1009declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
1010
1011
1012define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
1013  ; CHECK: vpmovsxwd
1014  %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
1015  ret <4 x i32> %res
1016}
1017declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
1018
1019
1020define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
1021  ; CHECK: vpmovsxwq
1022  %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
1023  ret <2 x i64> %res
1024}
1025declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
1026
1027
1028define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
1029  ; CHECK: vpmovzxbd
1030  %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
1031  ret <4 x i32> %res
1032}
1033declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
1034
1035
1036define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
1037  ; CHECK: vpmovzxbq
1038  %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
1039  ret <2 x i64> %res
1040}
1041declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
1042
1043
1044define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
1045  ; CHECK: vpmovzxbw
1046  %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
1047  ret <8 x i16> %res
1048}
1049declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
1050
1051
1052define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
1053  ; CHECK: vpmovzxdq
1054  %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
1055  ret <2 x i64> %res
1056}
1057declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
1058
1059
1060define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
1061  ; CHECK: vpmovzxwd
1062  %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
1063  ret <4 x i32> %res
1064}
1065declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
1066
1067
1068define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
1069  ; CHECK: vpmovzxwq
1070  %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
1071  ret <2 x i64> %res
1072}
1073declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
1074
1075
1076define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
1077  ; CHECK: vpmuldq
1078  %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
1079  ret <2 x i64> %res
1080}
1081declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
1082
1083
1084define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) {
1085  ; CHECK: vptest
1086  ; CHECK: sbbl
1087  %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
1088  ret i32 %res
1089}
1090declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
1091
1092
1093define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) {
1094  ; CHECK: vptest
1095  ; CHECK: seta
1096  ; CHECK: movzbl
1097  %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
1098  ret i32 %res
1099}
1100declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
1101
1102
1103define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) {
1104  ; CHECK: vptest
1105  ; CHECK: sete
1106  ; CHECK: movzbl
1107  %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
1108  ret i32 %res
1109}
1110declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
1111
1112
1113define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
1114  ; CHECK: vroundpd
1115  %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
1116  ret <2 x double> %res
1117}
1118declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
1119
1120
1121define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
1122  ; CHECK: vroundps
1123  %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
1124  ret <4 x float> %res
1125}
1126declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
1127
1128
1129define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
1130  ; CHECK: vroundsd
1131  %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
1132  ret <2 x double> %res
1133}
1134declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
1135
1136
1137define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
1138  ; CHECK: vroundss
1139  %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
1140  ret <4 x float> %res
1141}
1142declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
1143
1144
1145define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
1146  ; CHECK: movl $7
1147  ; CHECK: movl $7
1148  ; CHECK: vpcmpestri $7
1149  ; CHECK: movl
1150  %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1151  ret i32 %res
1152}
1153declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1154
1155
1156define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) {
1157  ; CHECK: movl $7
1158  ; CHECK: movl $7
1159  ; CHECK: vpcmpestri $7, (
1160  ; CHECK: movl
1161  %1 = load <16 x i8>* %a0
1162  %2 = load <16 x i8>* %a2
1163  %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1]
1164  ret i32 %res
1165}
1166
1167
1168define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
1169  ; CHECK: movl
1170  ; CHECK: movl
1171  ; CHECK: vpcmpestri
1172  ; CHECK: seta
1173  %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1174  ret i32 %res
1175}
1176declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1177
1178
1179define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
1180  ; CHECK: movl
1181  ; CHECK: movl
1182  ; CHECK: vpcmpestri
1183  ; CHECK: sbbl
1184  %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1185  ret i32 %res
1186}
1187declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1188
1189
1190define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
1191  ; CHECK: movl
1192  ; CHECK: movl
1193  ; CHECK: vpcmpestri
1194  ; CHECK: seto
1195  %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1196  ret i32 %res
1197}
1198declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1199
1200
1201define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
1202  ; CHECK: movl
1203  ; CHECK: movl
1204  ; CHECK: vpcmpestri
1205  ; CHECK: sets
1206  %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1207  ret i32 %res
1208}
1209declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1210
1211
1212define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
1213  ; CHECK: movl
1214  ; CHECK: movl
1215  ; CHECK: vpcmpestri
1216  ; CHECK: sete
1217  %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1218  ret i32 %res
1219}
1220declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1221
1222
1223define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
1224  ; CHECK: movl
1225  ; CHECK: movl
1226  ; CHECK: vpcmpestrm
1227  ; CHECK-NOT: vmov
1228  %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
1229  ret <16 x i8> %res
1230}
1231declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1232
1233
1234define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) {
1235  ; CHECK: movl $7
1236  ; CHECK: movl $7
1237  ; CHECK: vpcmpestrm $7,
1238  ; CHECK-NOT: vmov
1239  %1 = load <16 x i8>* %a2
1240  %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
1241  ret <16 x i8> %res
1242}
1243
1244
1245define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
1246  ; CHECK: vpcmpistri $7
1247  ; CHECK: movl
1248  %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1249  ret i32 %res
1250}
1251declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1252
1253
1254define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
1255  ; CHECK: vpcmpistri $7, (
1256  ; CHECK: movl
1257  %1 = load <16 x i8>* %a0
1258  %2 = load <16 x i8>* %a1
1259  %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1]
1260  ret i32 %res
1261}
1262
1263
1264define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
1265  ; CHECK: vpcmpistri
1266  ; CHECK: seta
1267  %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1268  ret i32 %res
1269}
1270declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1271
1272
1273define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
1274  ; CHECK: vpcmpistri
1275  ; CHECK: sbbl
1276  %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1277  ret i32 %res
1278}
1279declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1280
1281
1282define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
1283  ; CHECK: vpcmpistri
1284  ; CHECK: seto
1285  %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1286  ret i32 %res
1287}
1288declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1289
1290
1291define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
1292  ; CHECK: vpcmpistri
1293  ; CHECK: sets
1294  %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1295  ret i32 %res
1296}
1297declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1298
1299
1300define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
1301  ; CHECK: vpcmpistri
1302  ; CHECK: sete
1303  %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1304  ret i32 %res
1305}
1306declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1307
1308
1309define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
1310  ; CHECK: vpcmpistrm $7
1311  ; CHECK-NOT: vmov
1312  %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
1313  ret <16 x i8> %res
1314}
1315declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1316
1317
1318define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) {
1319  ; CHECK: vpcmpistrm $7, (
1320  ; CHECK-NOT: vmov
1321  %1 = load <16 x i8>* %a1
1322  %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1]
1323  ret <16 x i8> %res
1324}
1325
1326
1327define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
1328  ; CHECK: vaddss
1329  %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1330  ret <4 x float> %res
1331}
1332declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
1333
1334
1335define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
1336  ; CHECK: vcmpordps
1337  %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
1338  ret <4 x float> %res
1339}
1340declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
1341
1342
1343define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
1344  ; CHECK: vcmpordss
1345  %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
1346  ret <4 x float> %res
1347}
1348declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
1349
1350
1351define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
1352  ; CHECK: vcomiss
1353  ; CHECK: sete
1354  ; CHECK: movzbl
1355  %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1356  ret i32 %res
1357}
1358declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
1359
1360
1361define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
1362  ; CHECK: vcomiss
1363  ; CHECK: setae
1364  ; CHECK: movzbl
1365  %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1366  ret i32 %res
1367}
1368declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
1369
1370
1371define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
1372  ; CHECK: vcomiss
1373  ; CHECK: seta
1374  ; CHECK: movzbl
1375  %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1376  ret i32 %res
1377}
1378declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
1379
1380
1381define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
1382  ; CHECK: vcomiss
1383  ; CHECK: setbe
1384  ; CHECK: movzbl
1385  %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1386  ret i32 %res
1387}
1388declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
1389
1390
1391define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
1392  ; CHECK: vcomiss
1393  ; CHECK: sbb
1394  %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1395  ret i32 %res
1396}
1397declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
1398
1399
1400define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
1401  ; CHECK: vcomiss
1402  ; CHECK: setne
1403  ; CHECK: movzbl
1404  %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1405  ret i32 %res
1406}
1407declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
1408
1409
1410define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) {
1411  ; CHECK: movl
1412  ; CHECK: vcvtsi2ss
1413  %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
1414  ret <4 x float> %res
1415}
1416declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
1417
1418
1419define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
1420  ; CHECK: vcvtss2si
1421  %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
1422  ret i32 %res
1423}
1424declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
1425
1426
1427define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
1428  ; CHECK: vcvttss2si
1429  %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
1430  ret i32 %res
1431}
1432declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
1433
1434
1435define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
1436  ; CHECK: vdivss
1437  %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1438  ret <4 x float> %res
1439}
1440declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
1441
1442
1443define void @test_x86_sse_ldmxcsr(i8* %a0) {
1444  ; CHECK: movl
1445  ; CHECK: vldmxcsr
1446  call void @llvm.x86.sse.ldmxcsr(i8* %a0)
1447  ret void
1448}
1449declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
1450
1451
1452
1453define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
1454  ; CHECK: vmaxps
1455  %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1456  ret <4 x float> %res
1457}
1458declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
1459
1460
1461define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
1462  ; CHECK: vmaxss
1463  %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1464  ret <4 x float> %res
1465}
1466declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
1467
1468
1469define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
1470  ; CHECK: vminps
1471  %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1472  ret <4 x float> %res
1473}
1474declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
1475
1476
1477define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
1478  ; CHECK: vminss
1479  %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1480  ret <4 x float> %res
1481}
1482declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
1483
1484
1485define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
1486  ; CHECK: vmovmskps
1487  %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
1488  ret i32 %res
1489}
1490declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
1491
1492
1493
1494define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
1495  ; CHECK: vmulss
1496  %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1497  ret <4 x float> %res
1498}
1499declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
1500
1501
1502define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
1503  ; CHECK: vrcpps
1504  %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1505  ret <4 x float> %res
1506}
1507declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
1508
1509
1510define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
1511  ; CHECK: vrcpss
1512  %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1513  ret <4 x float> %res
1514}
1515declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
1516
1517
1518define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
1519  ; CHECK: vrsqrtps
1520  %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1521  ret <4 x float> %res
1522}
1523declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
1524
1525
1526define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
1527  ; CHECK: vrsqrtss
1528  %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1529  ret <4 x float> %res
1530}
1531declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
1532
1533
1534define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
1535  ; CHECK: vsqrtps
1536  %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1537  ret <4 x float> %res
1538}
1539declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
1540
1541
1542define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
1543  ; CHECK: vsqrtss
1544  %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1545  ret <4 x float> %res
1546}
1547declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
1548
1549
1550define void @test_x86_sse_stmxcsr(i8* %a0) {
1551  ; CHECK: movl
1552  ; CHECK: vstmxcsr
1553  call void @llvm.x86.sse.stmxcsr(i8* %a0)
1554  ret void
1555}
1556declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
1557
1558
1559define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
1560  ; CHECK: movl
1561  ; CHECK: vmovups
1562  call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
1563  ret void
1564}
1565declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
1566
1567
1568define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
1569  ; CHECK: vsubss
1570  %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1571  ret <4 x float> %res
1572}
1573declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
1574
1575
1576define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
1577  ; CHECK: vucomiss
1578  ; CHECK: sete
1579  ; CHECK: movzbl
1580  %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1581  ret i32 %res
1582}
1583declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
1584
1585
1586define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
1587  ; CHECK: vucomiss
1588  ; CHECK: setae
1589  ; CHECK: movzbl
1590  %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1591  ret i32 %res
1592}
1593declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
1594
1595
1596define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
1597  ; CHECK: vucomiss
1598  ; CHECK: seta
1599  ; CHECK: movzbl
1600  %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1601  ret i32 %res
1602}
1603declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
1604
1605
1606define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
1607  ; CHECK: vucomiss
1608  ; CHECK: setbe
1609  ; CHECK: movzbl
1610  %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1611  ret i32 %res
1612}
1613declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
1614
1615
1616define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
1617  ; CHECK: vucomiss
1618  ; CHECK: sbbl
1619  %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1620  ret i32 %res
1621}
1622declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
1623
1624
1625define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
1626  ; CHECK: vucomiss
1627  ; CHECK: setne
1628  ; CHECK: movzbl
1629  %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1630  ret i32 %res
1631}
1632declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
1633
1634
1635define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) {
1636  ; CHECK: vpabsb
1637  %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
1638  ret <16 x i8> %res
1639}
1640declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
1641
1642
1643define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) {
1644  ; CHECK: vpabsd
1645  %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
1646  ret <4 x i32> %res
1647}
1648declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
1649
1650
1651define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) {
1652  ; CHECK: vpabsw
1653  %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
1654  ret <8 x i16> %res
1655}
1656declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
1657
1658
1659define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
1660  ; CHECK: vphaddd
1661  %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1662  ret <4 x i32> %res
1663}
1664declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
1665
1666
1667define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
1668  ; CHECK: vphaddsw
1669  %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1670  ret <8 x i16> %res
1671}
1672declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
1673
1674
1675define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
1676  ; CHECK: vphaddw
1677  %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1678  ret <8 x i16> %res
1679}
1680declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
1681
1682
1683define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
1684  ; CHECK: vphsubd
1685  %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1686  ret <4 x i32> %res
1687}
1688declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
1689
1690
1691define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
1692  ; CHECK: vphsubsw
1693  %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1694  ret <8 x i16> %res
1695}
1696declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
1697
1698
1699define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
1700  ; CHECK: vphsubw
1701  %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1702  ret <8 x i16> %res
1703}
1704declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
1705
1706
1707define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) {
1708  ; CHECK: vpmaddubsw
1709  %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1]
1710  ret <8 x i16> %res
1711}
1712declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
1713
1714
1715define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
1716  ; CHECK: vpmulhrsw
1717  %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1718  ret <8 x i16> %res
1719}
1720declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
1721
1722
1723define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
1724  ; CHECK: vpshufb
1725  %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1726  ret <16 x i8> %res
1727}
1728declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
1729
1730
1731define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
1732  ; CHECK: vpsignb
1733  %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1734  ret <16 x i8> %res
1735}
1736declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
1737
1738
1739define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
1740  ; CHECK: vpsignd
1741  %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1742  ret <4 x i32> %res
1743}
1744declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
1745
1746
1747define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
1748  ; CHECK: vpsignw
1749  %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1750  ret <8 x i16> %res
1751}
1752declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
1753
1754
1755define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
1756  ; CHECK: vaddsubpd
1757  %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
1758  ret <4 x double> %res
1759}
1760declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
1761
1762
1763define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
1764  ; CHECK: vaddsubps
1765  %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
1766  ret <8 x float> %res
1767}
1768declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
1769
1770
1771define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
1772  ; CHECK: vblendpd
1773  %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
1774  ret <4 x double> %res
1775}
1776declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone
1777
1778
1779define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
1780  ; CHECK: vblendps
1781  %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
1782  ret <8 x float> %res
1783}
1784declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
1785
1786
1787define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
1788  ; CHECK: vblendvpd
1789  %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
1790  ret <4 x double> %res
1791}
1792declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
1793
1794
1795define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
1796  ; CHECK: vblendvps
1797  %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
1798  ret <8 x float> %res
1799}
1800declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
1801
1802
1803define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) {
1804  ; CHECK: vcmpordpd
1805  %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
1806  ret <4 x double> %res
1807}
1808declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
1809
1810
1811define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
1812  ; CHECK: vcmpordps
1813  %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
1814  ret <8 x float> %res
1815}
1816
1817define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) {
1818  ; CHECK: vcmpeqps
1819  %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1]
1820  ; CHECK: vcmpltps
1821  %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1]
1822  ; CHECK: vcmpleps
1823  %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1]
1824  ; CHECK: vcmpunordps
1825  %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1]
1826  ; CHECK: vcmpneqps
1827  %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1]
1828  ; CHECK: vcmpnltps
1829  %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1]
1830  ; CHECK: vcmpnleps
1831  %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1]
1832  ; CHECK: vcmpordps
1833  %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1]
1834  ; CHECK: vcmpeq_uqps
1835  %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1]
1836  ; CHECK: vcmpngeps
1837  %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1]
1838  ; CHECK: vcmpngtps
1839  %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1]
1840  ; CHECK: vcmpfalseps
1841  %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1]
1842  ; CHECK: vcmpneq_oqps
1843  %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1]
1844  ; CHECK: vcmpgeps
1845  %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1]
1846  ; CHECK: vcmpgtps
1847  %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1]
1848  ; CHECK: vcmptrueps
1849  %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1]
1850  ; CHECK: vcmpeq_osps
1851  %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1]
1852  ; CHECK: vcmplt_oqps
1853  %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1]
1854  ; CHECK: vcmple_oqps
1855  %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1]
1856  ; CHECK: vcmpunord_sps
1857  %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1]
1858  ; CHECK: vcmpneq_usps
1859  %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1]
1860  ; CHECK: vcmpnlt_uqps
1861  %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1]
1862  ; CHECK: vcmpnle_uqps
1863  %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1]
1864  ; CHECK: vcmpord_sps
1865  %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1]
1866  ; CHECK: vcmpeq_usps
1867  %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1]
1868  ; CHECK: vcmpnge_uqps
1869  %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1]
1870  ; CHECK: vcmpngt_uqps
1871  %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1]
1872  ; CHECK: vcmpfalse_osps
1873  %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1]
1874  ; CHECK: vcmpneq_osps
1875  %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1]
1876  ; CHECK: vcmpge_oqps
1877  %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1]
1878  ; CHECK: vcmpgt_oqps
1879  %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1]
1880  ; CHECK: vcmptrue_usps
1881  %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1]
1882  ret <8 x float> %res
1883}
1884declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
1885
1886
1887define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) {
1888  ; CHECK: vcvtpd2psy
1889  %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
1890  ret <4 x float> %res
1891}
1892declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone
1893
1894
1895define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) {
1896  ; CHECK: vcvtpd2dqy
1897  %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
1898  ret <4 x i32> %res
1899}
1900declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
1901
1902
1903define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
1904  ; CHECK: vcvtps2pd
1905  %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
1906  ret <4 x double> %res
1907}
1908declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
1909
1910
1911define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
1912  ; CHECK: vcvtps2dq
1913  %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
1914  ret <8 x i32> %res
1915}
1916declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
1917
1918
1919define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
1920  ; CHECK: vcvtdq2pd
1921  %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
1922  ret <4 x double> %res
1923}
1924declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
1925
1926
1927define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
1928  ; CHECK: vcvtdq2ps
1929  %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
1930  ret <8 x float> %res
1931}
1932declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
1933
1934
1935define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
1936  ; CHECK: vcvttpd2dqy
1937  %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
1938  ret <4 x i32> %res
1939}
1940declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
1941
1942
1943define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
1944  ; CHECK: vcvttps2dq
1945  %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
1946  ret <8 x i32> %res
1947}
1948declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
1949
1950
1951define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
1952  ; CHECK: vdpps
1953  %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
1954  ret <8 x float> %res
1955}
1956declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
1957
1958
1959define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
1960  ; CHECK: vhaddpd
1961  %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
1962  ret <4 x double> %res
1963}
1964declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
1965
1966
1967define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) {
1968  ; CHECK: vhaddps
1969  %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
1970  ret <8 x float> %res
1971}
1972declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
1973
1974
1975define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
1976  ; CHECK: vhsubpd
1977  %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
1978  ret <4 x double> %res
1979}
1980declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
1981
1982
1983define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
1984  ; CHECK: vhsubps
1985  %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
1986  ret <8 x float> %res
1987}
1988declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
1989
1990
1991define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
1992  ; CHECK: vlddqu
1993  %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
1994  ret <32 x i8> %res
1995}
1996declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
1997
1998
1999define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) {
2000  ; CHECK: vmaskmovpd
2001  %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
2002  ret <2 x double> %res
2003}
2004declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly
2005
2006
2007define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) {
2008  ; CHECK: vmaskmovpd
2009  %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2010  ret <4 x double> %res
2011}
2012declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly
2013
2014
2015define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) {
2016  ; CHECK: vmaskmovps
2017  %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
2018  ret <4 x float> %res
2019}
2020declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly
2021
2022
2023define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) {
2024  ; CHECK: vmaskmovps
2025  %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2026  ret <8 x float> %res
2027}
2028declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly
2029
2030
2031define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) {
2032  ; CHECK: vmaskmovpd
2033  call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2)
2034  ret void
2035}
2036declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind
2037
2038
2039define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) {
2040  ; CHECK: vmaskmovpd
2041  call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2)
2042  ret void
2043}
2044declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind
2045
2046
2047define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) {
2048  ; CHECK: vmaskmovps
2049  call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2)
2050  ret void
2051}
2052declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind
2053
2054
2055define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) {
2056  ; CHECK: vmaskmovps
2057  call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2)
2058  ret void
2059}
2060declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
2061
2062
2063define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) {
2064  ; CHECK: vmaxpd
2065  %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2066  ret <4 x double> %res
2067}
2068declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
2069
2070
2071define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) {
2072  ; CHECK: vmaxps
2073  %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2074  ret <8 x float> %res
2075}
2076declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
2077
2078
2079define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) {
2080  ; CHECK: vminpd
2081  %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2082  ret <4 x double> %res
2083}
2084declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
2085
2086
2087define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) {
2088  ; CHECK: vminps
2089  %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2090  ret <8 x float> %res
2091}
2092declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
2093
2094
2095define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) {
2096  ; CHECK: vmovmskpd
2097  %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1]
2098  ret i32 %res
2099}
2100declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
2101
2102
2103define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
2104  ; CHECK: vmovmskps
2105  %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1]
2106  ret i32 %res
2107}
2108declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
2109
2110
2111
2112
2113
2114
2115
2116define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
2117  ; CHECK: vptest
2118  ; CHECK: sbbl
2119  %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
2120  ret i32 %res
2121}
2122declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
2123
2124
2125define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) {
2126  ; CHECK: vptest
2127  ; CHECK: seta
2128  ; CHECK: movzbl
2129  %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
2130  ret i32 %res
2131}
2132declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone
2133
2134
2135define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) {
2136  ; CHECK: vptest
2137  ; CHECK: sete
2138  ; CHECK: movzbl
2139  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
2140  ret i32 %res
2141}
2142declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
2143
2144
2145define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) {
2146  ; CHECK: vrcpps
2147  %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
2148  ret <8 x float> %res
2149}
2150declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
2151
2152
2153define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) {
2154  ; CHECK: vroundpd
2155  %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1]
2156  ret <4 x double> %res
2157}
2158declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
2159
2160
2161define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) {
2162  ; CHECK: vroundps
2163  %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1]
2164  ret <8 x float> %res
2165}
2166declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
2167
2168
2169define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) {
2170  ; CHECK: vrsqrtps
2171  %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
2172  ret <8 x float> %res
2173}
2174declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
2175
2176
2177define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
2178  ; CHECK: vsqrtpd
2179  %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
2180  ret <4 x double> %res
2181}
2182declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
2183
2184
2185define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
2186  ; CHECK: vsqrtps
2187  %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
2188  ret <8 x float> %res
2189}
2190declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
2191
2192
2193define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
2194  ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
2195  ; CHECK: vmovups
2196  ; add operation forces the execution domain.
2197  %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2198  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
2199  ret void
2200}
2201declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
2202
2203
2204define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
2205  ; CHECK: vmovupd
2206  ; add operation forces the execution domain.
2207  %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
2208  call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
2209  ret void
2210}
2211declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
2212
2213
2214define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
2215  ; CHECK: vmovups
2216  call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
2217  ret void
2218}
2219declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
2220
2221
2222define <4 x double> @test_x86_avx_vbroadcast_sd_256(i8* %a0) {
2223  ; CHECK: vbroadcastsd
2224  %res = call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %a0) ; <<4 x double>> [#uses=1]
2225  ret <4 x double> %res
2226}
2227declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*) nounwind readonly
2228
2229
2230define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
2231  ; CHECK: vbroadcastf128
2232  %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
2233  ret <4 x double> %res
2234}
2235declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly
2236
2237
2238define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
2239  ; CHECK: vbroadcastf128
2240  %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
2241  ret <8 x float> %res
2242}
2243declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
2244
2245
2246define <4 x float> @test_x86_avx_vbroadcast_ss(i8* %a0) {
2247  ; CHECK: vbroadcastss
2248  %res = call <4 x float> @llvm.x86.avx.vbroadcast.ss(i8* %a0) ; <<4 x float>> [#uses=1]
2249  ret <4 x float> %res
2250}
2251declare <4 x float> @llvm.x86.avx.vbroadcast.ss(i8*) nounwind readonly
2252
2253
2254define <8 x float> @test_x86_avx_vbroadcast_ss_256(i8* %a0) {
2255  ; CHECK: vbroadcastss
2256  %res = call <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8* %a0) ; <<8 x float>> [#uses=1]
2257  ret <8 x float> %res
2258}
2259declare <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8*) nounwind readonly
2260
2261
2262define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) {
2263  ; CHECK: vextractf128
2264  %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 7) ; <<2 x double>> [#uses=1]
2265  ret <2 x double> %res
2266}
2267declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
2268
2269
2270define <4 x float> @test_x86_avx_vextractf128_ps_256(<8 x float> %a0) {
2271  ; CHECK: vextractf128
2272  %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
2273  ret <4 x float> %res
2274}
2275declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
2276
2277
2278define <4 x i32> @test_x86_avx_vextractf128_si_256(<8 x i32> %a0) {
2279  ; CHECK: vextractf128
2280  %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 7) ; <<4 x i32>> [#uses=1]
2281  ret <4 x i32> %res
2282}
2283declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
2284
2285
2286define <4 x double> @test_x86_avx_vinsertf128_pd_256(<4 x double> %a0, <2 x double> %a1) {
2287  ; CHECK: vinsertf128
2288  %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
2289  ret <4 x double> %res
2290}
2291declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
2292
2293
2294define <8 x float> @test_x86_avx_vinsertf128_ps_256(<8 x float> %a0, <4 x float> %a1) {
2295  ; CHECK: vinsertf128
2296  %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
2297  ret <8 x float> %res
2298}
2299declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
2300
2301
2302define <8 x i32> @test_x86_avx_vinsertf128_si_256(<8 x i32> %a0, <4 x i32> %a1) {
2303  ; CHECK: vinsertf128
2304  %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
2305  ret <8 x i32> %res
2306}
2307declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
2308
2309
2310define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
2311  ; CHECK: vperm2f128
2312  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
2313  ret <4 x double> %res
2314}
2315declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
2316
2317
2318define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
2319  ; CHECK: vperm2f128
2320  %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
2321  ret <8 x float> %res
2322}
2323declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
2324
2325
2326define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
2327  ; CHECK: vperm2f128
2328  %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
2329  ret <8 x i32> %res
2330}
2331declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
2332
2333
2334define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
2335  ; CHECK: vpermilpd
2336  %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 7) ; <<2 x double>> [#uses=1]
2337  ret <2 x double> %res
2338}
2339declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone
2340
2341
2342define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
2343  ; CHECK: vpermilpd
2344  %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
2345  ret <4 x double> %res
2346}
2347declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone
2348
2349
2350define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
2351  ; CHECK: vpshufd
2352  %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
2353  ret <4 x float> %res
2354}
2355declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
2356
2357
2358define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
2359  ; CHECK: vpermilps
2360  %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
2361  ret <8 x float> %res
2362}
2363declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone
2364
2365
2366define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {
2367  ; CHECK: vpermilpd
2368  %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
2369  ret <2 x double> %res
2370}
2371declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
2372
2373
2374define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
2375  ; CHECK: vpermilpd
2376  %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
2377  ret <4 x double> %res
2378}
2379declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
2380
2381
2382define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
2383  ; CHECK: vpermilps
2384  %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
2385  ret <4 x float> %res
2386}
2387define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) {
2388  ; CHECK: vpermilps
2389  %a2 = load <4 x i32>* %a1
2390  %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
2391  ret <4 x float> %res
2392}
2393declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
2394
2395
2396define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
2397  ; CHECK: vpermilps
2398  %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
2399  ret <8 x float> %res
2400}
2401declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
2402
2403
2404define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
2405  ; CHECK: vtestpd
2406  ; CHECK: sbbl
2407  %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
2408  ret i32 %res
2409}
2410declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
2411
2412
2413define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
2414  ; CHECK: vtestpd
2415  ; CHECK: sbbl
2416  %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
2417  ret i32 %res
2418}
2419declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
2420
2421
2422define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
2423  ; CHECK: vtestps
2424  ; CHECK: sbbl
2425  %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2426  ret i32 %res
2427}
2428declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
2429
2430
2431define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
2432  ; CHECK: vtestps
2433  ; CHECK: sbbl
2434  %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
2435  ret i32 %res
2436}
2437declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
2438
2439
2440define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) {
2441  ; CHECK: vtestpd
2442  ; CHECK: seta
2443  ; CHECK: movzbl
2444  %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
2445  ret i32 %res
2446}
2447declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone
2448
2449
2450define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) {
2451  ; CHECK: vtestpd
2452  ; CHECK: seta
2453  ; CHECK: movzbl
2454  %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
2455  ret i32 %res
2456}
2457declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone
2458
2459
2460define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) {
2461  ; CHECK: vtestps
2462  ; CHECK: seta
2463  ; CHECK: movzbl
2464  %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2465  ret i32 %res
2466}
2467declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone
2468
2469
2470define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) {
2471  ; CHECK: vtestps
2472  ; CHECK: seta
2473  ; CHECK: movzbl
2474  %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
2475  ret i32 %res
2476}
2477declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone
2478
2479
2480define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) {
2481  ; CHECK: vtestpd
2482  ; CHECK: sete
2483  ; CHECK: movzbl
2484  %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
2485  ret i32 %res
2486}
2487declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone
2488
2489
2490define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) {
2491  ; CHECK: vtestpd
2492  ; CHECK: sete
2493  ; CHECK: movzbl
2494  %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
2495  ret i32 %res
2496}
2497declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone
2498
2499
2500define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) {
2501  ; CHECK: vtestps
2502  ; CHECK: sete
2503  ; CHECK: movzbl
2504  %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2505  ret i32 %res
2506}
2507declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
2508
2509
2510define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) {
2511  ; CHECK: vtestps
2512  ; CHECK: sete
2513  ; CHECK: movzbl
2514  %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
2515  ret i32 %res
2516}
2517declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone
2518
2519
2520define void @test_x86_avx_vzeroall() {
2521  ; CHECK: vzeroall
2522  call void @llvm.x86.avx.vzeroall()
2523  ret void
2524}
2525declare void @llvm.x86.avx.vzeroall() nounwind
2526
2527
2528define void @test_x86_avx_vzeroupper() {
2529  ; CHECK: vzeroupper
2530  call void @llvm.x86.avx.vzeroupper()
2531  ret void
2532}
2533declare void @llvm.x86.avx.vzeroupper() nounwind
2534
2535; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work
2536
2537; CHECK: monitor
2538define void @monitor(i8* %P, i32 %E, i32 %H) nounwind {
2539entry:
2540  tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
2541  ret void
2542}
2543declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
2544
2545; CHECK: mwait
2546define void @mwait(i32 %E, i32 %H) nounwind {
2547entry:
2548  tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
2549  ret void
2550}
2551declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
2552
2553; CHECK: sfence
2554define void @sfence() nounwind {
2555entry:
2556  tail call void @llvm.x86.sse.sfence()
2557  ret void
2558}
2559declare void @llvm.x86.sse.sfence() nounwind
2560
2561; CHECK: lfence
2562define void @lfence() nounwind {
2563entry:
2564  tail call void @llvm.x86.sse2.lfence()
2565  ret void
2566}
2567declare void @llvm.x86.sse2.lfence() nounwind
2568
2569; CHECK: mfence
2570define void @mfence() nounwind {
2571entry:
2572  tail call void @llvm.x86.sse2.mfence()
2573  ret void
2574}
2575declare void @llvm.x86.sse2.mfence() nounwind
2576
2577; CHECK: clflush
2578define void @clflush(i8* %p) nounwind {
2579entry:
2580  tail call void @llvm.x86.sse2.clflush(i8* %p)
2581  ret void
2582}
2583declare void @llvm.x86.sse2.clflush(i8*) nounwind
2584
2585; CHECK: crc32b
2586define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
2587  %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
2588  ret i32 %tmp
2589}
2590declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
2591
2592; CHECK: crc32w
2593define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
2594  %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
2595  ret i32 %tmp
2596}
2597declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
2598
2599; CHECK: crc32l
2600define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
2601  %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
2602  ret i32 %tmp
2603}
2604declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
2605
2606; CHECK: movntdq
2607define void @movnt_dq(i8* %p, <4 x i64> %a1) nounwind {
2608  %a2 = add <4 x i64> %a1, <i64 1, i64 1, i64 1, i64 1>
2609  tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a2) nounwind
2610  ret void
2611}
2612declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
2613
2614; CHECK: movntps
2615define void @movnt_ps(i8* %p, <8 x float> %a) nounwind {
2616  tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind
2617  ret void
2618}
2619declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
2620
2621; CHECK: movntpd
2622define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
2623  ; add operation forces the execution domain.
2624  %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
2625  tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind
2626  ret void
2627}
2628declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
2629
2630
2631; Check for pclmulqdq
2632define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
2633; CHECK: vpclmulqdq
2634  %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1]
2635  ret <2 x i64> %res
2636}
2637declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
2638