1; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4,+xop | FileCheck %s
2
3define <2 x double> @test_int_x86_xop_vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
4  ; CHECK: vpermil2pd
5  %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 1) ;  [#uses=1]
6  ret <2 x double> %res
7}
8define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, <2 x double>* %a1, <2 x double> %a2) {
9  ; CHECK-NOT: vmovaps
10  ; CHECK: vpermil2pd
11  %vec = load <2 x double>* %a1
12  %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x double> %a2, i8 1) ;  [#uses=1]
13  ret <2 x double> %res
14}
15define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x double>* %a2) {
16  ; CHECK-NOT: vmovaps
17  ; CHECK: vpermil2pd
18  %vec = load <2 x double>* %a2
19  %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %vec, i8 1) ;  [#uses=1]
20  ret <2 x double> %res
21}
22declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
23
24define <4 x double> @test_int_x86_xop_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
25  ; CHECK: vpermil2pd
26  ; CHECK: ymm
27  %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 2) ;
28  ret <4 x double> %res
29}
30define <4 x double> @test_int_x86_xop_vpermil2pd_256_mr(<4 x double> %a0, <4 x double>* %a1, <4 x double> %a2) {
31  ; CHECK-NOT: vmovaps
32  ; CHECK: vpermil2pd
33  ; CHECK: ymm
34  %vec = load <4 x double>* %a1
35  %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x double> %a2, i8 2) ;
36  ret <4 x double> %res
37}
38define <4 x double> @test_int_x86_xop_vpermil2pd_256_rm(<4 x double> %a0, <4 x double> %a1, <4 x double>* %a2) {
39  ; CHECK-NOT: vmovaps
40  ; CHECK: vpermil2pd
41  ; CHECK: ymm
42  %vec = load <4 x double>* %a2
43  %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %vec, i8 2) ;
44  ret <4 x double> %res
45}
46declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
47
48define <4 x float> @test_int_x86_xop_vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
49  ; CHECK: vpermil2ps
50  %res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 3) ;
51  ret <4 x float> %res
52}
53declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
54
55define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
56  ; CHECK: vpermil2ps
57  ; CHECK: ymm
58  %res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 4) ;
59  ret <8 x float> %res
60}
61declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
62
63define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
64  ; CHECK: vpcmov
65  %res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ;
66  ret <2 x i64> %res
67}
68declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
69
70define <4 x i64> @test_int_x86_xop_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
71  ; CHECK: vpcmov
72  ; CHECK: ymm
73  %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) ;
74  ret <4 x i64> %res
75}
76define <4 x i64> @test_int_x86_xop_vpcmov_256_mr(<4 x i64> %a0, <4 x i64>* %a1, <4 x i64> %a2) {
77  ; CHECK-NOT: vmovaps
78  ; CHECK: vpcmov
79  ; CHECK: ymm
80  %vec = load <4 x i64>* %a1
81  %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %vec, <4 x i64> %a2) ;
82  ret <4 x i64> %res
83}
84define <4 x i64> @test_int_x86_xop_vpcmov_256_rm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64>* %a2) {
85  ; CHECK-NOT: vmovaps
86  ; CHECK: vpcmov
87  ; CHECK: ymm
88 %vec = load <4 x i64>* %a2
89 %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %vec) ;
90  ret <4 x i64> %res
91}
92declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone
93
94define <16 x i8> @test_int_x86_xop_vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) {
95  ; CHECK:vpcomb
96  %res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) ;
97  ret <16 x i8> %res
98}
99define <16 x i8> @test_int_x86_xop_vpcomeqb_mem(<16 x i8> %a0, <16 x i8>* %a1) {
100  ; CHECK-NOT: vmovaps
101  ; CHECK:vpcomb
102  %vec = load <16 x i8>* %a1
103  %res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %vec) ;
104  ret <16 x i8> %res
105}
106declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone
107
108define <8 x i16> @test_int_x86_xop_vpcomeqw(<8 x i16> %a0, <8 x i16> %a1) {
109  ; CHECK: vpcomw
110  %res = call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a0, <8 x i16> %a1) ;
111  ret <8 x i16> %res
112}
113declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone
114
115define <4 x i32> @test_int_x86_xop_vpcomeqd(<4 x i32> %a0, <4 x i32> %a1) {
116  ; CHECK: vpcomd
117  %res = call <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32> %a0, <4 x i32> %a1) ;
118  ret <4 x i32> %res
119}
120declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone
121
122define <2 x i64> @test_int_x86_xop_vpcomeqq(<2 x i64> %a0, <2 x i64> %a1) {
123  ; CHECK: vpcomq
124  %res = call <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64> %a0, <2 x i64> %a1) ;
125  ret <2 x i64> %res
126}
127declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone
128
129define <16 x i8> @test_int_x86_xop_vpcomequb(<16 x i8> %a0, <16 x i8> %a1) {
130  ; CHECK: vpcomub
131  %res = call <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8> %a0, <16 x i8> %a1) ;
132  ret <16 x i8> %res
133}
134declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone
135
136define <4 x i32> @test_int_x86_xop_vpcomequd(<4 x i32> %a0, <4 x i32> %a1) {
137  ; CHECK: vpcomud
138  %res = call <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32> %a0, <4 x i32> %a1) ;
139  ret <4 x i32> %res
140}
141declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone
142
143define <2 x i64> @test_int_x86_xop_vpcomequq(<2 x i64> %a0, <2 x i64> %a1) {
144  ; CHECK: vpcomuq
145  %res = call <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64> %a0, <2 x i64> %a1) ;
146  ret <2 x i64> %res
147}
148declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone
149
150define <8 x i16> @test_int_x86_xop_vpcomequw(<8 x i16> %a0, <8 x i16> %a1) {
151  ; CHECK: vpcomuw
152  %res = call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a0, <8 x i16> %a1) ;
153  ret <8 x i16> %res
154}
155declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone
156
157define <16 x i8> @test_int_x86_xop_vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) {
158  ; CHECK: vpcomb
159  %res = call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) ;
160  ret <16 x i8> %res
161}
162declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone
163
164define <4 x i32> @test_int_x86_xop_vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) {
165  ; CHECK: vpcomd
166  %res = call <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) ;
167  ret <4 x i32> %res
168}
169declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone
170
171define <2 x i64> @test_int_x86_xop_vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) {
172  ; CHECK: vpcomq
173  %res = call <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) ;
174  ret <2 x i64> %res
175}
176declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone
177
178define <16 x i8> @test_int_x86_xop_vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) {
179  ; CHECK: vpcomub
180  %res = call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) ;
181  ret <16 x i8> %res
182}
183declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone
184
185define <4 x i32> @test_int_x86_xop_vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) {
186  ; CHECK: vpcomud
187  %res = call <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) ;
188  ret <4 x i32> %res
189}
190declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone
191
192define <2 x i64> @test_int_x86_xop_vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) {
193  ; CHECK: vpcomuq
194  %res = call <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) ;
195  ret <2 x i64> %res
196}
197declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone
198
199define <8 x i16> @test_int_x86_xop_vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) {
200  ; CHECK: vpcomuw
201  %res = call <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) ;
202  ret <8 x i16> %res
203}
204declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone
205
206define <8 x i16> @test_int_x86_xop_vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) {
207  ; CHECK: vpcomw
208  %res = call <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) ;
209  ret <8 x i16> %res
210}
211declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone
212
213define <16 x i8> @test_int_x86_xop_vpcomgeb(<16 x i8> %a0, <16 x i8> %a1) {
214  ; CHECK: vpcomb
215  %res = call <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8> %a0, <16 x i8> %a1) ;
216  ret <16 x i8> %res
217}
218declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone
219
220define <4 x i32> @test_int_x86_xop_vpcomged(<4 x i32> %a0, <4 x i32> %a1) {
221  ; CHECK: vpcomd
222  %res = call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a0, <4 x i32> %a1) ;
223  ret <4 x i32> %res
224}
225declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone
226
227define <2 x i64> @test_int_x86_xop_vpcomgeq(<2 x i64> %a0, <2 x i64> %a1) {
228  ; CHECK: vpcomq
229  %res = call <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64> %a0, <2 x i64> %a1) ;
230  ret <2 x i64> %res
231}
232declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone
233
234define <16 x i8> @test_int_x86_xop_vpcomgeub(<16 x i8> %a0, <16 x i8> %a1) {
235  ; CHECK: vpcomub
236  %res = call <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8> %a0, <16 x i8> %a1) ;
237  ret <16 x i8> %res
238}
239declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone
240
241define <4 x i32> @test_int_x86_xop_vpcomgeud(<4 x i32> %a0, <4 x i32> %a1) {
242  ; CHECK: vpcomud
243  %res = call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a0, <4 x i32> %a1) ;
244  ret <4 x i32> %res
245}
246declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone
247
248define <2 x i64> @test_int_x86_xop_vpcomgeuq(<2 x i64> %a0, <2 x i64> %a1) {
249  ; CHECK: vpcomuq
250  %res = call <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64> %a0, <2 x i64> %a1) ;
251  ret <2 x i64> %res
252}
253declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone
254
255define <8 x i16> @test_int_x86_xop_vpcomgeuw(<8 x i16> %a0, <8 x i16> %a1) {
256  ; CHECK: vpcomuw
257  %res = call <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16> %a0, <8 x i16> %a1) ;
258  ret <8 x i16> %res
259}
260declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone
261
262define <8 x i16> @test_int_x86_xop_vpcomgew(<8 x i16> %a0, <8 x i16> %a1) {
263  ; CHECK: vpcomw
264  %res = call <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16> %a0, <8 x i16> %a1) ;
265  ret <8 x i16> %res
266}
267declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone
268
269define <16 x i8> @test_int_x86_xop_vpcomgtb(<16 x i8> %a0, <16 x i8> %a1) {
270  ; CHECK: vpcomb
271  %res = call <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8> %a0, <16 x i8> %a1) ;
272  ret <16 x i8> %res
273}
274declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone
275
276define <4 x i32> @test_int_x86_xop_vpcomgtd(<4 x i32> %a0, <4 x i32> %a1) {
277  ; CHECK: vpcomd
278  %res = call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a0, <4 x i32> %a1) ;
279  ret <4 x i32> %res
280}
281declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone
282
283define <2 x i64> @test_int_x86_xop_vpcomgtq(<2 x i64> %a0, <2 x i64> %a1) {
284  ; CHECK: vpcomq
285  %res = call <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64> %a0, <2 x i64> %a1) ;
286  ret <2 x i64> %res
287}
288declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone
289
290define <16 x i8> @test_int_x86_xop_vpcomgtub(<16 x i8> %a0, <16 x i8> %a1) {
291  ; CHECK: vpcomub
292  %res = call <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8> %a0, <16 x i8> %a1) ;
293  ret <16 x i8> %res
294}
295declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone
296
297define <4 x i32> @test_int_x86_xop_vpcomgtud(<4 x i32> %a0, <4 x i32> %a1) {
298  ; CHECK: vpcomud
299  %res = call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a0, <4 x i32> %a1) ;
300  ret <4 x i32> %res
301}
302declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone
303
304define <2 x i64> @test_int_x86_xop_vpcomgtuq(<2 x i64> %a0, <2 x i64> %a1) {
305  ; CHECK: vpcomuq
306  %res = call <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64> %a0, <2 x i64> %a1) ;
307  ret <2 x i64> %res
308}
309declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone
310
311define <8 x i16> @test_int_x86_xop_vpcomgtuw(<8 x i16> %a0, <8 x i16> %a1) {
312  ; CHECK: vpcomuw
313  %res = call <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16> %a0, <8 x i16> %a1) ;
314  ret <8 x i16> %res
315}
316declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone
317
318define <8 x i16> @test_int_x86_xop_vpcomgtw(<8 x i16> %a0, <8 x i16> %a1) {
319  ; CHECK: vpcomw
320  %res = call <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16> %a0, <8 x i16> %a1) ;
321  ret <8 x i16> %res
322}
323declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone
324
325define <16 x i8> @test_int_x86_xop_vpcomleb(<16 x i8> %a0, <16 x i8> %a1) {
326  ; CHECK: vpcomb
327  %res = call <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8> %a0, <16 x i8> %a1) ;
328  ret <16 x i8> %res
329}
330declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone
331
332define <4 x i32> @test_int_x86_xop_vpcomled(<4 x i32> %a0, <4 x i32> %a1) {
333  ; CHECK: vpcomd
334  %res = call <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32> %a0, <4 x i32> %a1) ;
335  ret <4 x i32> %res
336}
337declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone
338
339define <2 x i64> @test_int_x86_xop_vpcomleq(<2 x i64> %a0, <2 x i64> %a1) {
340  ; CHECK: vpcomq
341  %res = call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a0, <2 x i64> %a1) ;
342  ret <2 x i64> %res
343}
344declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone
345
346define <16 x i8> @test_int_x86_xop_vpcomleub(<16 x i8> %a0, <16 x i8> %a1) {
347  ; CHECK: vpcomub
348  %res = call <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8> %a0, <16 x i8> %a1) ;
349  ret <16 x i8> %res
350}
351declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone
352
353define <4 x i32> @test_int_x86_xop_vpcomleud(<4 x i32> %a0, <4 x i32> %a1) {
354  ; CHECK: vpcomud
355  %res = call <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32> %a0, <4 x i32> %a1) ;
356  ret <4 x i32> %res
357}
358declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone
359
360define <2 x i64> @test_int_x86_xop_vpcomleuq(<2 x i64> %a0, <2 x i64> %a1) {
361  ; CHECK: vpcomuq
362  %res = call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a0, <2 x i64> %a1) ;
363  ret <2 x i64> %res
364}
365declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone
366
367define <8 x i16> @test_int_x86_xop_vpcomleuw(<8 x i16> %a0, <8 x i16> %a1) {
368  ; CHECK: vpcomuw
369  %res = call <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16> %a0, <8 x i16> %a1) ;
370  ret <8 x i16> %res
371}
372declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone
373
374define <8 x i16> @test_int_x86_xop_vpcomlew(<8 x i16> %a0, <8 x i16> %a1) {
375  ; CHECK: vpcomw
376  %res = call <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16> %a0, <8 x i16> %a1) ;
377  ret <8 x i16> %res
378}
379declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone
380
381define <16 x i8> @test_int_x86_xop_vpcomltb(<16 x i8> %a0, <16 x i8> %a1) {
382  ; CHECK: vpcomb
383  %res = call <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8> %a0, <16 x i8> %a1) ;
384  ret <16 x i8> %res
385}
386declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone
387
388define <4 x i32> @test_int_x86_xop_vpcomltd(<4 x i32> %a0, <4 x i32> %a1) {
389  ; CHECK: vpcomd
390  %res = call <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32> %a0, <4 x i32> %a1) ;
391  ret <4 x i32> %res
392}
393declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone
394
395define <2 x i64> @test_int_x86_xop_vpcomltq(<2 x i64> %a0, <2 x i64> %a1) {
396  ; CHECK: vpcomq
397  %res = call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a0, <2 x i64> %a1) ;
398  ret <2 x i64> %res
399}
400declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone
401
402define <16 x i8> @test_int_x86_xop_vpcomltub(<16 x i8> %a0, <16 x i8> %a1) {
403  ; CHECK: vpcomub
404  %res = call <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8> %a0, <16 x i8> %a1) ;
405  ret <16 x i8> %res
406}
407declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone
408
409define <4 x i32> @test_int_x86_xop_vpcomltud(<4 x i32> %a0, <4 x i32> %a1) {
410  ; CHECK: vpcomud
411  %res = call <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32> %a0, <4 x i32> %a1) ;
412  ret <4 x i32> %res
413}
414declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone
415
416define <2 x i64> @test_int_x86_xop_vpcomltuq(<2 x i64> %a0, <2 x i64> %a1) {
417  ; CHECK: vpcomuq
418  %res = call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a0, <2 x i64> %a1) ;
419  ret <2 x i64> %res
420}
421declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone
422
423define <8 x i16> @test_int_x86_xop_vpcomltuw(<8 x i16> %a0, <8 x i16> %a1) {
424  ; CHECK: vpcomuw
425  %res = call <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16> %a0, <8 x i16> %a1) ;
426  ret <8 x i16> %res
427}
428declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone
429
430define <8 x i16> @test_int_x86_xop_vpcomltw(<8 x i16> %a0, <8 x i16> %a1) {
431  ; CHECK: vpcomw
432  %res = call <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16> %a0, <8 x i16> %a1) ;
433  ret <8 x i16> %res
434}
435declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone
436
437define <16 x i8> @test_int_x86_xop_vpcomneb(<16 x i8> %a0, <16 x i8> %a1) {
438  ; CHECK: vpcomb
439  %res = call <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8> %a0, <16 x i8> %a1) ;
440  ret <16 x i8> %res
441}
442declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone
443
444define <4 x i32> @test_int_x86_xop_vpcomned(<4 x i32> %a0, <4 x i32> %a1) {
445  ; CHECK: vpcomd
446  %res = call <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32> %a0, <4 x i32> %a1) ;
447  ret <4 x i32> %res
448}
449declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone
450
451define <2 x i64> @test_int_x86_xop_vpcomneq(<2 x i64> %a0, <2 x i64> %a1) {
452  ; CHECK: vpcomq
453  %res = call <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64> %a0, <2 x i64> %a1) ;
454  ret <2 x i64> %res
455}
456declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone
457
458define <16 x i8> @test_int_x86_xop_vpcomneub(<16 x i8> %a0, <16 x i8> %a1) {
459  ; CHECK: vpcomub
460  %res = call <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8> %a0, <16 x i8> %a1) ;
461  ret <16 x i8> %res
462}
463declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone
464
465define <4 x i32> @test_int_x86_xop_vpcomneud(<4 x i32> %a0, <4 x i32> %a1) {
466  ; CHECK: vpcomud
467  %res = call <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32> %a0, <4 x i32> %a1) ;
468  ret <4 x i32> %res
469}
470declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone
471
472define <2 x i64> @test_int_x86_xop_vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) {
473  ; CHECK: vpcomuq
474  %res = call <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) ;
475  ret <2 x i64> %res
476}
477declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone
478
479define <8 x i16> @test_int_x86_xop_vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) {
480  ; CHECK: vpcomuw
481  %res = call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) ;
482  ret <8 x i16> %res
483}
484declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone
485
486define <8 x i16> @test_int_x86_xop_vpcomnew(<8 x i16> %a0, <8 x i16> %a1) {
487  ; CHECK: vpcomw
488  %res = call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a0, <8 x i16> %a1) ;
489  ret <8 x i16> %res
490}
491declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone
492
493define <16 x i8> @test_int_x86_xop_vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) {
494  ; CHECK: vpcomb
495  %res = call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) ;
496  ret <16 x i8> %res
497}
498declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone
499
500define <4 x i32> @test_int_x86_xop_vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) {
501  ; CHECK: vpcomd
502  %res = call <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) ;
503  ret <4 x i32> %res
504}
505declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone
506
507define <2 x i64> @test_int_x86_xop_vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) {
508  ; CHECK: vpcomq
509  %res = call <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) ;
510  ret <2 x i64> %res
511}
512declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone
513
514define <16 x i8> @test_int_x86_xop_vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) {
515  ; CHECK: vpcomub
516  %res = call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) ;
517  ret <16 x i8> %res
518}
519declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone
520
521define <4 x i32> @test_int_x86_xop_vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) {
522  ; CHECK: vpcomud
523  %res = call <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) ;
524  ret <4 x i32> %res
525}
526declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone
527
528define <2 x i64> @test_int_x86_xop_vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) {
529  ; CHECK: vpcomuq
530  %res = call <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) ;
531  ret <2 x i64> %res
532}
533declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone
534
535define <8 x i16> @test_int_x86_xop_vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) {
536  ; CHECK: vpcomuw
537  %res = call <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) ;
538  ret <8 x i16> %res
539}
540declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone
541
542define <8 x i16> @test_int_x86_xop_vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) {
543  ; CHECK: vpcomw
544  %res = call <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) ;
545  ret <8 x i16> %res
546}
547declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
548
549define <4 x i32> @test_int_x86_xop_vphaddbd(<16 x i8> %a0) {
550  ; CHECK: vphaddbd
551  %res = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %a0) ;
552  ret <4 x i32> %res
553}
554declare <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8>) nounwind readnone
555
556define <2 x i64> @test_int_x86_xop_vphaddbq(<16 x i8> %a0) {
557  ; CHECK: vphaddbq
558  %res = call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %a0) ;
559  ret <2 x i64> %res
560}
561declare <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8>) nounwind readnone
562
563define <8 x i16> @test_int_x86_xop_vphaddbw(<16 x i8> %a0) {
564  ; CHECK: vphaddbw
565  %res = call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %a0) ;
566  ret <8 x i16> %res
567}
568declare <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8>) nounwind readnone
569
570define <2 x i64> @test_int_x86_xop_vphadddq(<4 x i32> %a0) {
571  ; CHECK: vphadddq
572  %res = call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %a0) ;
573  ret <2 x i64> %res
574}
575declare <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32>) nounwind readnone
576
577define <4 x i32> @test_int_x86_xop_vphaddubd(<16 x i8> %a0) {
578  ; CHECK: vphaddubd
579  %res = call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %a0) ;
580  ret <4 x i32> %res
581}
582declare <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8>) nounwind readnone
583
584define <2 x i64> @test_int_x86_xop_vphaddubq(<16 x i8> %a0) {
585  ; CHECK: vphaddubq
586  %res = call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %a0) ;
587  ret <2 x i64> %res
588}
589declare <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8>) nounwind readnone
590
591define <8 x i16> @test_int_x86_xop_vphaddubw(<16 x i8> %a0) {
592  ; CHECK: vphaddubw
593  %res = call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %a0) ;
594  ret <8 x i16> %res
595}
596declare <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8>) nounwind readnone
597
598define <2 x i64> @test_int_x86_xop_vphaddudq(<4 x i32> %a0) {
599  ; CHECK: vphaddudq
600  %res = call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %a0) ;
601  ret <2 x i64> %res
602}
603declare <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32>) nounwind readnone
604
605define <4 x i32> @test_int_x86_xop_vphadduwd(<8 x i16> %a0) {
606  ; CHECK: vphadduwd
607  %res = call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %a0) ;
608  ret <4 x i32> %res
609}
610declare <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16>) nounwind readnone
611
612define <2 x i64> @test_int_x86_xop_vphadduwq(<8 x i16> %a0) {
613  ; CHECK: vphadduwq
614  %res = call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %a0) ;
615  ret <2 x i64> %res
616}
617declare <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16>) nounwind readnone
618
619define <4 x i32> @test_int_x86_xop_vphaddwd(<8 x i16> %a0) {
620  ; CHECK: vphaddwd
621  %res = call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %a0) ;
622  ret <4 x i32> %res
623}
624declare <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16>) nounwind readnone
625
626define <2 x i64> @test_int_x86_xop_vphaddwq(<8 x i16> %a0) {
627  ; CHECK: vphaddwq
628  %res = call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %a0) ;
629  ret <2 x i64> %res
630}
631declare <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16>) nounwind readnone
632
633define <8 x i16> @test_int_x86_xop_vphsubbw(<16 x i8> %a0) {
634  ; CHECK: vphsubbw
635  %res = call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %a0) ;
636  ret <8 x i16> %res
637}
638declare <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8>) nounwind readnone
639
640define <2 x i64> @test_int_x86_xop_vphsubdq(<4 x i32> %a0) {
641  ; CHECK: vphsubdq
642  %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %a0) ;
643  ret <2 x i64> %res
644}
645define <2 x i64> @test_int_x86_xop_vphsubdq_mem(<4 x i32>* %a0) {
646  ; CHECK-NOT: vmovaps
647  ; CHECK: vphsubdq
648  %vec = load <4 x i32>* %a0
649  %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %vec) ;
650  ret <2 x i64> %res
651}
652declare <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32>) nounwind readnone
653
654define <4 x i32> @test_int_x86_xop_vphsubwd(<8 x i16> %a0) {
655  ; CHECK: vphsubwd
656  %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %a0) ;
657  ret <4 x i32> %res
658}
659define <4 x i32> @test_int_x86_xop_vphsubwd_mem(<8 x i16>* %a0) {
660  ; CHECK-NOT: vmovaps
661  ; CHECK: vphsubwd
662  %vec = load <8 x i16>* %a0
663  %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %vec) ;
664  ret <4 x i32> %res
665}
666declare <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16>) nounwind readnone
667
668define <4 x i32> @test_int_x86_xop_vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
669  ; CHECK: vpmacsdd
670  %res = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
671  ret <4 x i32> %res
672}
673declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
674
675define <2 x i64> @test_int_x86_xop_vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
676  ; CHECK: vpmacsdqh
677  %res = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
678  ret <2 x i64> %res
679}
680declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
681
682define <2 x i64> @test_int_x86_xop_vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
683  ; CHECK: vpmacsdql
684  %res = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
685  ret <2 x i64> %res
686}
687declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
688
689define <4 x i32> @test_int_x86_xop_vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
690  ; CHECK: vpmacssdd
691  %res = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
692  ret <4 x i32> %res
693}
694declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
695
696define <2 x i64> @test_int_x86_xop_vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
697  ; CHECK: vpmacssdqh
698  %res = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
699  ret <2 x i64> %res
700}
701declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
702
703define <2 x i64> @test_int_x86_xop_vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
704  ; CHECK: vpmacssdql
705  %res = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
706  ret <2 x i64> %res
707}
708declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
709
710define <4 x i32> @test_int_x86_xop_vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
711  ; CHECK: vpmacsswd
712  %res = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
713  ret <4 x i32> %res
714}
715declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
716
717define <8 x i16> @test_int_x86_xop_vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
718  ; CHECK: vpmacssww
719  %res = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
720  ret <8 x i16> %res
721}
722declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
723
724define <4 x i32> @test_int_x86_xop_vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
725  ; CHECK: vpmacswd
726  %res = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
727  ret <4 x i32> %res
728}
729declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
730
731define <8 x i16> @test_int_x86_xop_vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
732  ; CHECK: vpmacsww
733  %res = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
734  ret <8 x i16> %res
735}
736declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
737
738define <4 x i32> @test_int_x86_xop_vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
739  ; CHECK: vpmadcsswd
740  %res = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
741  ret <4 x i32> %res
742}
743declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
744
745define <4 x i32> @test_int_x86_xop_vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
746  ; CHECK: vpmadcswd
747  %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
748  ret <4 x i32> %res
749}
750define <4 x i32> @test_int_x86_xop_vpmadcswd_mem(<8 x i16> %a0, <8 x i16>* %a1, <4 x i32> %a2) {
751  ; CHECK-NOT: vmovaps
752  ; CHECK: vpmadcswd
753  %vec = load <8 x i16>* %a1
754  %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %vec, <4 x i32> %a2) ;
755  ret <4 x i32> %res
756}
757declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
758
759define <16 x i8> @test_int_x86_xop_vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
760  ; CHECK: vpperm
761  %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ;
762  ret <16 x i8> %res
763}
764define <16 x i8> @test_int_x86_xop_vpperm_rm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %a2) {
765  ; CHECK-NOT: vmovaps
766  ; CHECK: vpperm
767  %vec = load <16 x i8>* %a2
768  %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %vec) ;
769  ret <16 x i8> %res
770}
771define <16 x i8> @test_int_x86_xop_vpperm_mr(<16 x i8> %a0, <16 x i8>* %a1, <16 x i8> %a2) {
772  ; CHECK-NOT: vmovaps
773  ; CHECK: vpperm
774  %vec = load <16 x i8>* %a1
775  %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %vec, <16 x i8> %a2) ;
776  ret <16 x i8> %res
777}
778declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
779
780define <16 x i8> @test_int_x86_xop_vprotb(<16 x i8> %a0, <16 x i8> %a1) {
781  ; CHECK: vprotb
782  %res = call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %a0, <16 x i8> %a1) ;
783  ret <16 x i8> %res
784}
785declare <16 x i8> @llvm.x86.xop.vprotb(<16 x i8>, <16 x i8>) nounwind readnone
786
787define <4 x i32> @test_int_x86_xop_vprotd(<4 x i32> %a0, <4 x i32> %a1) {
788  ; CHECK: vprotd
789  %res = call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %a0, <4 x i32> %a1) ;
790  ret <4 x i32> %res
791}
792declare <4 x i32> @llvm.x86.xop.vprotd(<4 x i32>, <4 x i32>) nounwind readnone
793
794define <2 x i64> @test_int_x86_xop_vprotq(<2 x i64> %a0, <2 x i64> %a1) {
795  ; CHECK: vprotq
796  %res = call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %a0, <2 x i64> %a1) ;
797  ret <2 x i64> %res
798}
799declare <2 x i64> @llvm.x86.xop.vprotq(<2 x i64>, <2 x i64>) nounwind readnone
800
801define <8 x i16> @test_int_x86_xop_vprotw(<8 x i16> %a0, <8 x i16> %a1) {
802  ; CHECK: vprotw
803  %res = call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %a0, <8 x i16> %a1) ;
804  ret <8 x i16> %res
805}
806declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone
807
808define <16 x i8> @test_int_x86_xop_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
809  ; CHECK: vpshab
810  %res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %a0, <16 x i8> %a1) ;
811  ret <16 x i8> %res
812}
813declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone
814
815define <4 x i32> @test_int_x86_xop_vpshad(<4 x i32> %a0, <4 x i32> %a1) {
816  ; CHECK: vpshad
817  %res = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %a0, <4 x i32> %a1) ;
818  ret <4 x i32> %res
819}
820declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone
821
822define <2 x i64> @test_int_x86_xop_vpshaq(<2 x i64> %a0, <2 x i64> %a1) {
823  ; CHECK: vpshaq
824  %res = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a0, <2 x i64> %a1) ;
825  ret <2 x i64> %res
826}
827declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone
828
829define <8 x i16> @test_int_x86_xop_vpshaw(<8 x i16> %a0, <8 x i16> %a1) {
830  ; CHECK: vpshaw
831  %res = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %a0, <8 x i16> %a1) ;
832  ret <8 x i16> %res
833}
834declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone
835
836define <16 x i8> @test_int_x86_xop_vpshlb(<16 x i8> %a0, <16 x i8> %a1) {
837  ; CHECK: vpshlb
838  %res = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %a0, <16 x i8> %a1) ;
839  ret <16 x i8> %res
840}
841declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone
842
843define <4 x i32> @test_int_x86_xop_vpshld(<4 x i32> %a0, <4 x i32> %a1) {
844  ; CHECK: vpshld
845  %res = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %a0, <4 x i32> %a1) ;
846  ret <4 x i32> %res
847}
848declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone
849
850define <2 x i64> @test_int_x86_xop_vpshlq(<2 x i64> %a0, <2 x i64> %a1) {
851  ; CHECK: vpshlq
852  %res = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a0, <2 x i64> %a1) ;
853  ret <2 x i64> %res
854}
855declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone
856
857define <8 x i16> @test_int_x86_xop_vpshlw(<8 x i16> %a0, <8 x i16> %a1) {
858  ; CHECK: vpshlw
859  %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %a1) ;
860  ret <8 x i16> %res
861}
862define <8 x i16> @test_int_x86_xop_vpshlw_rm(<8 x i16> %a0, <8 x i16>* %a1) {
863  ; CHECK-NOT: vmovaps
864  ; CHECK: vpshlw
865  %vec = load <8 x i16>* %a1
866  %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %vec) ;
867  ret <8 x i16> %res
868}
869define <8 x i16> @test_int_x86_xop_vpshlw_mr(<8 x i16>* %a0, <8 x i16> %a1) {
870  ; CHECK-NOT: vmovaps
871  ; CHECK: vpshlw
872  %vec = load <8 x i16>* %a0
873  %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %vec, <8 x i16> %a1) ;
874  ret <8 x i16> %res
875}
876declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone
877
878define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0) {
879  ; CHECK-NOT: mov
880  ; CHECK: vfrczss
881  %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0) ;
882  ret <4 x float> %res
883}
884define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(float* %a0) {
885  ; CHECK-NOT: mov
886  ; CHECK: vfrczss
887  %elem = load float* %a0
888  %vec = insertelement <4 x float> undef, float %elem, i32 0
889  %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %vec) ;
890  ret <4 x float> %res
891}
892declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone
893
894define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0) {
895  ; CHECK-NOT: mov
896  ; CHECK: vfrczsd
897  %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0) ;
898  ret <2 x double> %res
899}
900define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(double* %a0) {
901  ; CHECK-NOT: mov
902  ; CHECK: vfrczsd
903  %elem = load double* %a0
904  %vec = insertelement <2 x double> undef, double %elem, i32 0
905  %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %vec) ;
906  ret <2 x double> %res
907}
908declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone
909
910define <2 x double> @test_int_x86_xop_vfrcz_pd(<2 x double> %a0) {
911  ; CHECK: vfrczpd
912  %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %a0) ;
913  ret <2 x double> %res
914}
915define <2 x double> @test_int_x86_xop_vfrcz_pd_mem(<2 x double>* %a0) {
916  ; CHECK-NOT: vmovaps
917  ; CHECK: vfrczpd
918  %vec = load <2 x double>* %a0
919  %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %vec) ;
920  ret <2 x double> %res
921}
922declare <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double>) nounwind readnone
923
924define <4 x double> @test_int_x86_xop_vfrcz_pd_256(<4 x double> %a0) {
925  ; CHECK: vfrczpd
926  ; CHECK: ymm
927  %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %a0) ;
928  ret <4 x double> %res
929}
930define <4 x double> @test_int_x86_xop_vfrcz_pd_256_mem(<4 x double>* %a0) {
931  ; CHECK-NOT: vmovaps
932  ; CHECK: vfrczpd
933  ; CHECK: ymm
934  %vec = load <4 x double>* %a0
935  %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %vec) ;
936  ret <4 x double> %res
937}
938declare <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double>) nounwind readnone
939
940define <4 x float> @test_int_x86_xop_vfrcz_ps(<4 x float> %a0) {
941  ; CHECK: vfrczps
942  %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %a0) ;
943  ret <4 x float> %res
944}
945define <4 x float> @test_int_x86_xop_vfrcz_ps_mem(<4 x float>* %a0) {
946  ; CHECK-NOT: vmovaps
947  ; CHECK: vfrczps
948  %vec = load <4 x float>* %a0
949  %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %vec) ;
950  ret <4 x float> %res
951}
952declare <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float>) nounwind readnone
953
954define <8 x float> @test_int_x86_xop_vfrcz_ps_256(<8 x float> %a0) {
955  ; CHECK: vfrczps
956  ; CHECK: ymm
957  %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %a0) ;
958  ret <8 x float> %res
959}
960define <8 x float> @test_int_x86_xop_vfrcz_ps_256_mem(<8 x float>* %a0) {
961  ; CHECK-NOT: vmovaps
962  ; CHECK: vfrczps
963  ; CHECK: ymm
964  %vec = load <8 x float>* %a0
965  %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %vec) ;
966  ret <8 x float> %res
967}
968declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone
969
970define <16 x i8> @test_int_x86_xop_vpcomb(<16 x i8> %a0, <16 x i8> %a1) {
971  ; CHECK:vpcomb
972  %res = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %a0, <16 x i8> %a1, i8 0) ;
973  ret <16 x i8> %res
974}
975declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone
976
977define <8 x i16> @test_int_x86_xop_vpcomw(<8 x i16> %a0, <8 x i16> %a1) {
978  ; CHECK: vpcomw
979  %res = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ;
980  ret <8 x i16> %res
981}
982declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone
983
984define <4 x i32> @test_int_x86_xop_vpcomd(<4 x i32> %a0, <4 x i32> %a1) {
985  ; CHECK: vpcomd
986  %res = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %a0, <4 x i32> %a1, i8 0) ;
987  ret <4 x i32> %res
988}
989declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone
990
991define <2 x i64> @test_int_x86_xop_vpcomq(<2 x i64> %a0, <2 x i64> %a1) {
992  ; CHECK: vpcomq
993  %res = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ;
994  ret <2 x i64> %res
995}
996declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone
997
998define <16 x i8> @test_int_x86_xop_vpcomub(<16 x i8> %a0, <16 x i8> %a1) {
999  ; CHECK:vpcomub
1000  %res = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %a0, <16 x i8> %a1, i8 0) ;
1001  ret <16 x i8> %res
1002}
1003declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone
1004
1005define <8 x i16> @test_int_x86_xop_vpcomuw(<8 x i16> %a0, <8 x i16> %a1) {
1006  ; CHECK: vpcomuw
1007  %res = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ;
1008  ret <8 x i16> %res
1009}
1010declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone
1011
1012define <4 x i32> @test_int_x86_xop_vpcomud(<4 x i32> %a0, <4 x i32> %a1) {
1013  ; CHECK: vpcomud
1014  %res = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %a0, <4 x i32> %a1, i8 0) ;
1015  ret <4 x i32> %res
1016}
1017declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone
1018
1019define <2 x i64> @test_int_x86_xop_vpcomuq(<2 x i64> %a0, <2 x i64> %a1) {
1020  ; CHECK: vpcomuq
1021  %res = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ;
1022  ret <2 x i64> %res
1023}
1024declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone
1025
1026