1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -mattr=+avx,+fma4,+xop | FileCheck %s --check-prefix=ALL --check-prefix=X32
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+xop | FileCheck %s --check-prefix=ALL --check-prefix=X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/xop-builtins.c
6
7define <2 x i64> @test_mm_maccs_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
8; X32-LABEL: test_mm_maccs_epi16:
9; X32:       # %bb.0:
10; X32-NEXT:    vpmacssww %xmm2, %xmm1, %xmm0, %xmm0
11; X32-NEXT:    retl
12;
13; X64-LABEL: test_mm_maccs_epi16:
14; X64:       # %bb.0:
15; X64-NEXT:    vpmacssww %xmm2, %xmm1, %xmm0, %xmm0
16; X64-NEXT:    retq
17  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
18  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
19  %arg2 = bitcast <2 x i64> %a2 to <8 x i16>
20  %res = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %arg0, <8 x i16> %arg1, <8 x i16> %arg2)
21  %bc = bitcast <8 x i16> %res to <2 x i64>
22  ret <2 x i64> %bc
23}
24declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
25
26define <2 x i64> @test_mm_macc_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
27; X32-LABEL: test_mm_macc_epi16:
28; X32:       # %bb.0:
29; X32-NEXT:    vpmacsww %xmm2, %xmm1, %xmm0, %xmm0
30; X32-NEXT:    retl
31;
32; X64-LABEL: test_mm_macc_epi16:
33; X64:       # %bb.0:
34; X64-NEXT:    vpmacsww %xmm2, %xmm1, %xmm0, %xmm0
35; X64-NEXT:    retq
36  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
37  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
38  %arg2 = bitcast <2 x i64> %a2 to <8 x i16>
39  %res = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %arg0, <8 x i16> %arg1, <8 x i16> %arg2)
40  %bc = bitcast <8 x i16> %res to <2 x i64>
41  ret <2 x i64> %bc
42}
43declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
44
45define <2 x i64> @test_mm_maccsd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
46; X32-LABEL: test_mm_maccsd_epi16:
47; X32:       # %bb.0:
48; X32-NEXT:    vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0
49; X32-NEXT:    retl
50;
51; X64-LABEL: test_mm_maccsd_epi16:
52; X64:       # %bb.0:
53; X64-NEXT:    vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0
54; X64-NEXT:    retq
55  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
56  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
57  %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
58  %res = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %arg0, <8 x i16> %arg1, <4 x i32> %arg2)
59  %bc = bitcast <4 x i32> %res to <2 x i64>
60  ret <2 x i64> %bc
61}
62declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
63
64define <2 x i64> @test_mm_maccd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
65; X32-LABEL: test_mm_maccd_epi16:
66; X32:       # %bb.0:
67; X32-NEXT:    vpmacswd %xmm2, %xmm1, %xmm0, %xmm0
68; X32-NEXT:    retl
69;
70; X64-LABEL: test_mm_maccd_epi16:
71; X64:       # %bb.0:
72; X64-NEXT:    vpmacswd %xmm2, %xmm1, %xmm0, %xmm0
73; X64-NEXT:    retq
74  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
75  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
76  %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
77  %res = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %arg0, <8 x i16> %arg1, <4 x i32> %arg2)
78  %bc = bitcast <4 x i32> %res to <2 x i64>
79  ret <2 x i64> %bc
80}
81declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
82
83define <2 x i64> @test_mm_maccs_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
84; X32-LABEL: test_mm_maccs_epi32:
85; X32:       # %bb.0:
86; X32-NEXT:    vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0
87; X32-NEXT:    retl
88;
89; X64-LABEL: test_mm_maccs_epi32:
90; X64:       # %bb.0:
91; X64-NEXT:    vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0
92; X64-NEXT:    retq
93  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
94  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
95  %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
96  %res = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2)
97  %bc = bitcast <4 x i32> %res to <2 x i64>
98  ret <2 x i64> %bc
99}
100declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
101
102define <2 x i64> @test_mm_macc_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
103; X32-LABEL: test_mm_macc_epi32:
104; X32:       # %bb.0:
105; X32-NEXT:    vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0
106; X32-NEXT:    retl
107;
108; X64-LABEL: test_mm_macc_epi32:
109; X64:       # %bb.0:
110; X64-NEXT:    vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0
111; X64-NEXT:    retq
112  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
113  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
114  %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
115  %res = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2)
116  %bc = bitcast <4 x i32> %res to <2 x i64>
117  ret <2 x i64> %bc
118}
119declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
120
121define <2 x i64> @test_mm_maccslo_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
122; X32-LABEL: test_mm_maccslo_epi32:
123; X32:       # %bb.0:
124; X32-NEXT:    vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0
125; X32-NEXT:    retl
126;
127; X64-LABEL: test_mm_maccslo_epi32:
128; X64:       # %bb.0:
129; X64-NEXT:    vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0
130; X64-NEXT:    retq
131  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
132  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
133  %res = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2)
134  ret <2 x i64> %res
135}
136declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
137
138define <2 x i64> @test_mm_macclo_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
139; X32-LABEL: test_mm_macclo_epi32:
140; X32:       # %bb.0:
141; X32-NEXT:    vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0
142; X32-NEXT:    retl
143;
144; X64-LABEL: test_mm_macclo_epi32:
145; X64:       # %bb.0:
146; X64-NEXT:    vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0
147; X64-NEXT:    retq
148  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
149  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
150  %res = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2)
151  ret <2 x i64> %res
152}
153declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
154
155define <2 x i64> @test_mm_maccshi_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
156; X32-LABEL: test_mm_maccshi_epi32:
157; X32:       # %bb.0:
158; X32-NEXT:    vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0
159; X32-NEXT:    retl
160;
161; X64-LABEL: test_mm_maccshi_epi32:
162; X64:       # %bb.0:
163; X64-NEXT:    vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0
164; X64-NEXT:    retq
165  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
166  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
167  %res = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2)
168  ret <2 x i64> %res
169}
170declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
171
172define <2 x i64> @test_mm_macchi_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
173; X32-LABEL: test_mm_macchi_epi32:
174; X32:       # %bb.0:
175; X32-NEXT:    vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0
176; X32-NEXT:    retl
177;
178; X64-LABEL: test_mm_macchi_epi32:
179; X64:       # %bb.0:
180; X64-NEXT:    vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0
181; X64-NEXT:    retq
182  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
183  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
184  %res = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2)
185  ret <2 x i64> %res
186}
187declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
188
189define <2 x i64> @test_mm_maddsd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
190; X32-LABEL: test_mm_maddsd_epi16:
191; X32:       # %bb.0:
192; X32-NEXT:    vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0
193; X32-NEXT:    retl
194;
195; X64-LABEL: test_mm_maddsd_epi16:
196; X64:       # %bb.0:
197; X64-NEXT:    vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0
198; X64-NEXT:    retq
199  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
200  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
201  %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
202  %res = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %arg0, <8 x i16> %arg1, <4 x i32> %arg2)
203  %bc = bitcast <4 x i32> %res to <2 x i64>
204  ret <2 x i64> %bc
205}
206declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
207
208define <2 x i64> @test_mm_maddd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
209; X32-LABEL: test_mm_maddd_epi16:
210; X32:       # %bb.0:
211; X32-NEXT:    vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0
212; X32-NEXT:    retl
213;
214; X64-LABEL: test_mm_maddd_epi16:
215; X64:       # %bb.0:
216; X64-NEXT:    vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0
217; X64-NEXT:    retq
218  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
219  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
220  %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
221  %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %arg0, <8 x i16> %arg1, <4 x i32> %arg2)
222  %bc = bitcast <4 x i32> %res to <2 x i64>
223  ret <2 x i64> %bc
224}
225declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
226
227define <2 x i64> @test_mm_haddw_epi8(<2 x i64> %a0) {
228; X32-LABEL: test_mm_haddw_epi8:
229; X32:       # %bb.0:
230; X32-NEXT:    vphaddbw %xmm0, %xmm0
231; X32-NEXT:    retl
232;
233; X64-LABEL: test_mm_haddw_epi8:
234; X64:       # %bb.0:
235; X64-NEXT:    vphaddbw %xmm0, %xmm0
236; X64-NEXT:    retq
237  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
238  %res = call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %arg0)
239  %bc = bitcast <8 x i16> %res to <2 x i64>
240  ret <2 x i64> %bc
241}
242declare <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8>) nounwind readnone
243
244define <2 x i64> @test_mm_haddd_epi8(<2 x i64> %a0) {
245; X32-LABEL: test_mm_haddd_epi8:
246; X32:       # %bb.0:
247; X32-NEXT:    vphaddbd %xmm0, %xmm0
248; X32-NEXT:    retl
249;
250; X64-LABEL: test_mm_haddd_epi8:
251; X64:       # %bb.0:
252; X64-NEXT:    vphaddbd %xmm0, %xmm0
253; X64-NEXT:    retq
254  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
255  %res = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %arg0)
256  %bc = bitcast <4 x i32> %res to <2 x i64>
257  ret <2 x i64> %bc
258}
259declare <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8>) nounwind readnone
260
261define <2 x i64> @test_mm_haddq_epi8(<2 x i64> %a0) {
262; X32-LABEL: test_mm_haddq_epi8:
263; X32:       # %bb.0:
264; X32-NEXT:    vphaddbq %xmm0, %xmm0
265; X32-NEXT:    retl
266;
267; X64-LABEL: test_mm_haddq_epi8:
268; X64:       # %bb.0:
269; X64-NEXT:    vphaddbq %xmm0, %xmm0
270; X64-NEXT:    retq
271  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
272  %res = call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %arg0)
273  ret <2 x i64> %res
274}
275declare <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8>) nounwind readnone
276
277define <2 x i64> @test_mm_haddd_epi16(<2 x i64> %a0) {
278; X32-LABEL: test_mm_haddd_epi16:
279; X32:       # %bb.0:
280; X32-NEXT:    vphaddwd %xmm0, %xmm0
281; X32-NEXT:    retl
282;
283; X64-LABEL: test_mm_haddd_epi16:
284; X64:       # %bb.0:
285; X64-NEXT:    vphaddwd %xmm0, %xmm0
286; X64-NEXT:    retq
287  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
288  %res = call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %arg0)
289  %bc = bitcast <4 x i32> %res to <2 x i64>
290  ret <2 x i64> %bc
291}
292declare <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16>) nounwind readnone
293
294define <2 x i64> @test_mm_haddq_epi16(<2 x i64> %a0) {
295; X32-LABEL: test_mm_haddq_epi16:
296; X32:       # %bb.0:
297; X32-NEXT:    vphaddwq %xmm0, %xmm0
298; X32-NEXT:    retl
299;
300; X64-LABEL: test_mm_haddq_epi16:
301; X64:       # %bb.0:
302; X64-NEXT:    vphaddwq %xmm0, %xmm0
303; X64-NEXT:    retq
304  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
305  %res = call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %arg0)
306  ret <2 x i64> %res
307}
308declare <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16>) nounwind readnone
309
310define <2 x i64> @test_mm_haddq_epi32(<2 x i64> %a0) {
311; X32-LABEL: test_mm_haddq_epi32:
312; X32:       # %bb.0:
313; X32-NEXT:    vphadddq %xmm0, %xmm0
314; X32-NEXT:    retl
315;
316; X64-LABEL: test_mm_haddq_epi32:
317; X64:       # %bb.0:
318; X64-NEXT:    vphadddq %xmm0, %xmm0
319; X64-NEXT:    retq
320  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
321  %res = call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %arg0)
322  ret <2 x i64> %res
323}
324declare <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32>) nounwind readnone
325
326define <2 x i64> @test_mm_haddw_epu8(<2 x i64> %a0) {
327; X32-LABEL: test_mm_haddw_epu8:
328; X32:       # %bb.0:
329; X32-NEXT:    vphaddubw %xmm0, %xmm0
330; X32-NEXT:    retl
331;
332; X64-LABEL: test_mm_haddw_epu8:
333; X64:       # %bb.0:
334; X64-NEXT:    vphaddubw %xmm0, %xmm0
335; X64-NEXT:    retq
336  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
337  %res = call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %arg0)
338  %bc = bitcast <8 x i16> %res to <2 x i64>
339  ret <2 x i64> %bc
340}
341declare <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8>) nounwind readnone
342
343define <2 x i64> @test_mm_haddd_epu8(<2 x i64> %a0) {
344; X32-LABEL: test_mm_haddd_epu8:
345; X32:       # %bb.0:
346; X32-NEXT:    vphaddubd %xmm0, %xmm0
347; X32-NEXT:    retl
348;
349; X64-LABEL: test_mm_haddd_epu8:
350; X64:       # %bb.0:
351; X64-NEXT:    vphaddubd %xmm0, %xmm0
352; X64-NEXT:    retq
353  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
354  %res = call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %arg0)
355  %bc = bitcast <4 x i32> %res to <2 x i64>
356  ret <2 x i64> %bc
357}
358declare <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8>) nounwind readnone
359
360define <2 x i64> @test_mm_haddq_epu8(<2 x i64> %a0) {
361; X32-LABEL: test_mm_haddq_epu8:
362; X32:       # %bb.0:
363; X32-NEXT:    vphaddubq %xmm0, %xmm0
364; X32-NEXT:    retl
365;
366; X64-LABEL: test_mm_haddq_epu8:
367; X64:       # %bb.0:
368; X64-NEXT:    vphaddubq %xmm0, %xmm0
369; X64-NEXT:    retq
370  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
371  %res = call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %arg0)
372  ret <2 x i64> %res
373}
374declare <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8>) nounwind readnone
375
376define <2 x i64> @test_mm_haddd_epu16(<2 x i64> %a0) {
377; X32-LABEL: test_mm_haddd_epu16:
378; X32:       # %bb.0:
379; X32-NEXT:    vphadduwd %xmm0, %xmm0
380; X32-NEXT:    retl
381;
382; X64-LABEL: test_mm_haddd_epu16:
383; X64:       # %bb.0:
384; X64-NEXT:    vphadduwd %xmm0, %xmm0
385; X64-NEXT:    retq
386  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
387  %res = call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %arg0)
388  %bc = bitcast <4 x i32> %res to <2 x i64>
389  ret <2 x i64> %bc
390}
391declare <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16>) nounwind readnone
392
393
394define <2 x i64> @test_mm_haddq_epu16(<2 x i64> %a0) {
395; X32-LABEL: test_mm_haddq_epu16:
396; X32:       # %bb.0:
397; X32-NEXT:    vphadduwq %xmm0, %xmm0
398; X32-NEXT:    retl
399;
400; X64-LABEL: test_mm_haddq_epu16:
401; X64:       # %bb.0:
402; X64-NEXT:    vphadduwq %xmm0, %xmm0
403; X64-NEXT:    retq
404  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
405  %res = call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %arg0)
406  ret <2 x i64> %res
407}
408declare <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16>) nounwind readnone
409
410define <2 x i64> @test_mm_haddq_epu32(<2 x i64> %a0) {
411; X32-LABEL: test_mm_haddq_epu32:
412; X32:       # %bb.0:
413; X32-NEXT:    vphaddudq %xmm0, %xmm0
414; X32-NEXT:    retl
415;
416; X64-LABEL: test_mm_haddq_epu32:
417; X64:       # %bb.0:
418; X64-NEXT:    vphaddudq %xmm0, %xmm0
419; X64-NEXT:    retq
420  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
421  %res = call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %arg0)
422  ret <2 x i64> %res
423}
424declare <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32>) nounwind readnone
425
426define <2 x i64> @test_mm_hsubw_epi8(<2 x i64> %a0) {
427; X32-LABEL: test_mm_hsubw_epi8:
428; X32:       # %bb.0:
429; X32-NEXT:    vphsubbw %xmm0, %xmm0
430; X32-NEXT:    retl
431;
432; X64-LABEL: test_mm_hsubw_epi8:
433; X64:       # %bb.0:
434; X64-NEXT:    vphsubbw %xmm0, %xmm0
435; X64-NEXT:    retq
436  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
437  %res = call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %arg0)
438  %bc = bitcast <8 x i16> %res to <2 x i64>
439  ret <2 x i64> %bc
440}
441declare <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8>) nounwind readnone
442
443define <2 x i64> @test_mm_hsubd_epi16(<2 x i64> %a0) {
444; X32-LABEL: test_mm_hsubd_epi16:
445; X32:       # %bb.0:
446; X32-NEXT:    vphsubwd %xmm0, %xmm0
447; X32-NEXT:    retl
448;
449; X64-LABEL: test_mm_hsubd_epi16:
450; X64:       # %bb.0:
451; X64-NEXT:    vphsubwd %xmm0, %xmm0
452; X64-NEXT:    retq
453  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
454  %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %arg0)
455  %bc = bitcast <4 x i32> %res to <2 x i64>
456  ret <2 x i64> %bc
457}
458declare <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16>) nounwind readnone
459
460define <2 x i64> @test_mm_hsubq_epi32(<2 x i64> %a0) {
461; X32-LABEL: test_mm_hsubq_epi32:
462; X32:       # %bb.0:
463; X32-NEXT:    vphsubdq %xmm0, %xmm0
464; X32-NEXT:    retl
465;
466; X64-LABEL: test_mm_hsubq_epi32:
467; X64:       # %bb.0:
468; X64-NEXT:    vphsubdq %xmm0, %xmm0
469; X64-NEXT:    retq
470  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
471  %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %arg0)
472  ret <2 x i64> %res
473}
474declare <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32>) nounwind readnone
475
476define <2 x i64> @test_mm_cmov_si128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
477; X32-LABEL: test_mm_cmov_si128:
478; X32:       # %bb.0:
479; X32-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
480; X32-NEXT:    vpxor %xmm3, %xmm2, %xmm3
481; X32-NEXT:    vpand %xmm2, %xmm0, %xmm0
482; X32-NEXT:    vpand %xmm3, %xmm1, %xmm1
483; X32-NEXT:    vpor %xmm1, %xmm0, %xmm0
484; X32-NEXT:    retl
485;
486; X64-LABEL: test_mm_cmov_si128:
487; X64:       # %bb.0:
488; X64-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
489; X64-NEXT:    vpxor %xmm3, %xmm2, %xmm3
490; X64-NEXT:    vpand %xmm2, %xmm0, %xmm0
491; X64-NEXT:    vpand %xmm3, %xmm1, %xmm1
492; X64-NEXT:    vpor %xmm1, %xmm0, %xmm0
493; X64-NEXT:    retq
494  %res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2)
495  ret <2 x i64> %res
496}
497declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
498
499define <4 x i64> @test_mm256_cmov_si256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
500; X32-LABEL: test_mm256_cmov_si256:
501; X32:       # %bb.0:
502; X32-NEXT:    vxorps %xmm3, %xmm3, %xmm3
503; X32-NEXT:    vcmptrueps %ymm3, %ymm3, %ymm3
504; X32-NEXT:    vxorps %ymm3, %ymm2, %ymm3
505; X32-NEXT:    vandps %ymm2, %ymm0, %ymm0
506; X32-NEXT:    vandps %ymm3, %ymm1, %ymm1
507; X32-NEXT:    vorps %ymm1, %ymm0, %ymm0
508; X32-NEXT:    retl
509;
510; X64-LABEL: test_mm256_cmov_si256:
511; X64:       # %bb.0:
512; X64-NEXT:    vxorps %xmm3, %xmm3, %xmm3
513; X64-NEXT:    vcmptrueps %ymm3, %ymm3, %ymm3
514; X64-NEXT:    vxorps %ymm3, %ymm2, %ymm3
515; X64-NEXT:    vandps %ymm2, %ymm0, %ymm0
516; X64-NEXT:    vandps %ymm3, %ymm1, %ymm1
517; X64-NEXT:    vorps %ymm1, %ymm0, %ymm0
518; X64-NEXT:    retq
519  %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2)
520  ret <4 x i64> %res
521}
522declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone
523
524define <2 x i64> @test_mm_perm_epi8(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
525; X32-LABEL: test_mm_perm_epi8:
526; X32:       # %bb.0:
527; X32-NEXT:    vpperm %xmm2, %xmm1, %xmm0, %xmm0
528; X32-NEXT:    retl
529;
530; X64-LABEL: test_mm_perm_epi8:
531; X64:       # %bb.0:
532; X64-NEXT:    vpperm %xmm2, %xmm1, %xmm0, %xmm0
533; X64-NEXT:    retq
534  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
535  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
536  %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
537  %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %arg0, <16 x i8> %arg1, <16 x i8> %arg2)
538  %bc = bitcast <16 x i8> %res to <2 x i64>
539  ret <2 x i64> %bc
540}
541declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
542
543define <2 x i64> @test_mm_rot_epi8(<2 x i64> %a0, <2 x i64> %a1) {
544; X32-LABEL: test_mm_rot_epi8:
545; X32:       # %bb.0:
546; X32-NEXT:    vprotb %xmm1, %xmm0, %xmm0
547; X32-NEXT:    retl
548;
549; X64-LABEL: test_mm_rot_epi8:
550; X64:       # %bb.0:
551; X64-NEXT:    vprotb %xmm1, %xmm0, %xmm0
552; X64-NEXT:    retq
553  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
554  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
555  %res = call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %arg0, <16 x i8> %arg1)
556  %bc = bitcast <16 x i8> %res to <2 x i64>
557  ret <2 x i64> %bc
558}
559declare <16 x i8> @llvm.x86.xop.vprotb(<16 x i8>, <16 x i8>) nounwind readnone
560
561define <2 x i64> @test_mm_rot_epi16(<2 x i64> %a0, <2 x i64> %a1) {
562; X32-LABEL: test_mm_rot_epi16:
563; X32:       # %bb.0:
564; X32-NEXT:    vprotw %xmm1, %xmm0, %xmm0
565; X32-NEXT:    retl
566;
567; X64-LABEL: test_mm_rot_epi16:
568; X64:       # %bb.0:
569; X64-NEXT:    vprotw %xmm1, %xmm0, %xmm0
570; X64-NEXT:    retq
571  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
572  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
573  %res = call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %arg0, <8 x i16> %arg1)
574  %bc = bitcast <8 x i16> %res to <2 x i64>
575  ret <2 x i64> %bc
576}
577declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone
578
579define <2 x i64> @test_mm_rot_epi32(<2 x i64> %a0, <2 x i64> %a1) {
580; X32-LABEL: test_mm_rot_epi32:
581; X32:       # %bb.0:
582; X32-NEXT:    vprotd %xmm1, %xmm0, %xmm0
583; X32-NEXT:    retl
584;
585; X64-LABEL: test_mm_rot_epi32:
586; X64:       # %bb.0:
587; X64-NEXT:    vprotd %xmm1, %xmm0, %xmm0
588; X64-NEXT:    retq
589  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
590  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
591  %res = call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %arg0, <4 x i32> %arg1)
592  %bc = bitcast <4 x i32> %res to <2 x i64>
593  ret <2 x i64> %bc
594}
595declare <4 x i32> @llvm.x86.xop.vprotd(<4 x i32>, <4 x i32>) nounwind readnone
596
597define <2 x i64> @test_mm_rot_epi64(<2 x i64> %a0, <2 x i64> %a1) {
598; X32-LABEL: test_mm_rot_epi64:
599; X32:       # %bb.0:
600; X32-NEXT:    vprotq %xmm1, %xmm0, %xmm0
601; X32-NEXT:    retl
602;
603; X64-LABEL: test_mm_rot_epi64:
604; X64:       # %bb.0:
605; X64-NEXT:    vprotq %xmm1, %xmm0, %xmm0
606; X64-NEXT:    retq
607  %res = call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %a0, <2 x i64> %a1)
608  ret <2 x i64> %res
609}
610declare <2 x i64> @llvm.x86.xop.vprotq(<2 x i64>, <2 x i64>) nounwind readnone
611
612define <2 x i64> @test_mm_roti_epi8(<2 x i64> %a0) {
613; X32-LABEL: test_mm_roti_epi8:
614; X32:       # %bb.0:
615; X32-NEXT:    vprotb $1, %xmm0, %xmm0
616; X32-NEXT:    retl
617;
618; X64-LABEL: test_mm_roti_epi8:
619; X64:       # %bb.0:
620; X64-NEXT:    vprotb $1, %xmm0, %xmm0
621; X64-NEXT:    retq
622  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
623  %res = call <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8> %arg0, i8 1)
624  %bc = bitcast <16 x i8> %res to <2 x i64>
625  ret <2 x i64> %bc
626}
627declare <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8>, i8) nounwind readnone
628
629define <2 x i64> @test_mm_roti_epi16(<2 x i64> %a0) {
630; X32-LABEL: test_mm_roti_epi16:
631; X32:       # %bb.0:
632; X32-NEXT:    vprotw $50, %xmm0, %xmm0
633; X32-NEXT:    retl
634;
635; X64-LABEL: test_mm_roti_epi16:
636; X64:       # %bb.0:
637; X64-NEXT:    vprotw $50, %xmm0, %xmm0
638; X64-NEXT:    retq
639  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
640  %res = call <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16> %arg0, i8 50)
641  %bc = bitcast <8 x i16> %res to <2 x i64>
642  ret <2 x i64> %bc
643}
644declare <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16>, i8) nounwind readnone
645
646define <2 x i64> @test_mm_roti_epi32(<2 x i64> %a0) {
647; X32-LABEL: test_mm_roti_epi32:
648; X32:       # %bb.0:
649; X32-NEXT:    vprotd $226, %xmm0, %xmm0
650; X32-NEXT:    retl
651;
652; X64-LABEL: test_mm_roti_epi32:
653; X64:       # %bb.0:
654; X64-NEXT:    vprotd $226, %xmm0, %xmm0
655; X64-NEXT:    retq
656  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
657  %res = call <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32> %arg0, i8 -30)
658  %bc = bitcast <4 x i32> %res to <2 x i64>
659  ret <2 x i64> %bc
660}
661declare <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32>, i8) nounwind readnone
662
663define <2 x i64> @test_mm_roti_epi64(<2 x i64> %a0) {
664; X32-LABEL: test_mm_roti_epi64:
665; X32:       # %bb.0:
666; X32-NEXT:    vprotq $100, %xmm0, %xmm0
667; X32-NEXT:    retl
668;
669; X64-LABEL: test_mm_roti_epi64:
670; X64:       # %bb.0:
671; X64-NEXT:    vprotq $100, %xmm0, %xmm0
672; X64-NEXT:    retq
673  %res = call <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64> %a0, i8 100)
674  ret <2 x i64> %res
675}
676declare <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64>, i8) nounwind readnone
677
678define <2 x i64> @test_mm_shl_epi8(<2 x i64> %a0, <2 x i64> %a1) {
679; X32-LABEL: test_mm_shl_epi8:
680; X32:       # %bb.0:
681; X32-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
682; X32-NEXT:    retl
683;
684; X64-LABEL: test_mm_shl_epi8:
685; X64:       # %bb.0:
686; X64-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
687; X64-NEXT:    retq
688  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
689  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
690  %res = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %arg0, <16 x i8> %arg1)
691  %bc = bitcast <16 x i8> %res to <2 x i64>
692  ret <2 x i64> %bc
693}
694declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone
695
696define <2 x i64> @test_mm_shl_epi16(<2 x i64> %a0, <2 x i64> %a1) {
697; X32-LABEL: test_mm_shl_epi16:
698; X32:       # %bb.0:
699; X32-NEXT:    vpshlw %xmm1, %xmm0, %xmm0
700; X32-NEXT:    retl
701;
702; X64-LABEL: test_mm_shl_epi16:
703; X64:       # %bb.0:
704; X64-NEXT:    vpshlw %xmm1, %xmm0, %xmm0
705; X64-NEXT:    retq
706  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
707  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
708  %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %arg0, <8 x i16> %arg1)
709  %bc = bitcast <8 x i16> %res to <2 x i64>
710  ret <2 x i64> %bc
711}
712declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone
713
714define <2 x i64> @test_mm_shl_epi32(<2 x i64> %a0, <2 x i64> %a1) {
715; X32-LABEL: test_mm_shl_epi32:
716; X32:       # %bb.0:
717; X32-NEXT:    vpshld %xmm1, %xmm0, %xmm0
718; X32-NEXT:    retl
719;
720; X64-LABEL: test_mm_shl_epi32:
721; X64:       # %bb.0:
722; X64-NEXT:    vpshld %xmm1, %xmm0, %xmm0
723; X64-NEXT:    retq
724  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
725  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
726  %res = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %arg0, <4 x i32> %arg1)
727  %bc = bitcast <4 x i32> %res to <2 x i64>
728  ret <2 x i64> %bc
729}
730declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone
731
732define <2 x i64> @test_mm_shl_epi64(<2 x i64> %a0, <2 x i64> %a1) {
733; X32-LABEL: test_mm_shl_epi64:
734; X32:       # %bb.0:
735; X32-NEXT:    vpshlq %xmm1, %xmm0, %xmm0
736; X32-NEXT:    retl
737;
738; X64-LABEL: test_mm_shl_epi64:
739; X64:       # %bb.0:
740; X64-NEXT:    vpshlq %xmm1, %xmm0, %xmm0
741; X64-NEXT:    retq
742  %res = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a0, <2 x i64> %a1)
743  ret <2 x i64> %res
744}
745declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone
746
747define <2 x i64> @test_mm_sha_epi8(<2 x i64> %a0, <2 x i64> %a1) {
748; X32-LABEL: test_mm_sha_epi8:
749; X32:       # %bb.0:
750; X32-NEXT:    vpshab %xmm1, %xmm0, %xmm0
751; X32-NEXT:    retl
752;
753; X64-LABEL: test_mm_sha_epi8:
754; X64:       # %bb.0:
755; X64-NEXT:    vpshab %xmm1, %xmm0, %xmm0
756; X64-NEXT:    retq
757  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
758  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
759  %res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %arg0, <16 x i8> %arg1)
760  %bc = bitcast <16 x i8> %res to <2 x i64>
761  ret <2 x i64> %bc
762}
763declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone
764
765define <2 x i64> @test_mm_sha_epi16(<2 x i64> %a0, <2 x i64> %a1) {
766; X32-LABEL: test_mm_sha_epi16:
767; X32:       # %bb.0:
768; X32-NEXT:    vpshaw %xmm1, %xmm0, %xmm0
769; X32-NEXT:    retl
770;
771; X64-LABEL: test_mm_sha_epi16:
772; X64:       # %bb.0:
773; X64-NEXT:    vpshaw %xmm1, %xmm0, %xmm0
774; X64-NEXT:    retq
775  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
776  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
777  %res = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %arg0, <8 x i16> %arg1)
778  %bc = bitcast <8 x i16> %res to <2 x i64>
779  ret <2 x i64> %bc
780}
781declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone
782
783define <2 x i64> @test_mm_sha_epi32(<2 x i64> %a0, <2 x i64> %a1) {
784; X32-LABEL: test_mm_sha_epi32:
785; X32:       # %bb.0:
786; X32-NEXT:    vpshad %xmm1, %xmm0, %xmm0
787; X32-NEXT:    retl
788;
789; X64-LABEL: test_mm_sha_epi32:
790; X64:       # %bb.0:
791; X64-NEXT:    vpshad %xmm1, %xmm0, %xmm0
792; X64-NEXT:    retq
793  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
794  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
795  %res = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %arg0, <4 x i32> %arg1)
796  %bc = bitcast <4 x i32> %res to <2 x i64>
797  ret <2 x i64> %bc
798}
799declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone
800
801define <2 x i64> @test_mm_sha_epi64(<2 x i64> %a0, <2 x i64> %a1) {
802; X32-LABEL: test_mm_sha_epi64:
803; X32:       # %bb.0:
804; X32-NEXT:    vpshaq %xmm1, %xmm0, %xmm0
805; X32-NEXT:    retl
806;
807; X64-LABEL: test_mm_sha_epi64:
808; X64:       # %bb.0:
809; X64-NEXT:    vpshaq %xmm1, %xmm0, %xmm0
810; X64-NEXT:    retq
811  %res = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a0, <2 x i64> %a1)
812  ret <2 x i64> %res
813}
814declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone
815
816define <2 x i64> @test_mm_com_epu8(<2 x i64> %a0, <2 x i64> %a1) {
817; X32-LABEL: test_mm_com_epu8:
818; X32:       # %bb.0:
819; X32-NEXT:    vpcomltub %xmm1, %xmm0, %xmm0
820; X32-NEXT:    retl
821;
822; X64-LABEL: test_mm_com_epu8:
823; X64:       # %bb.0:
824; X64-NEXT:    vpcomltub %xmm1, %xmm0, %xmm0
825; X64-NEXT:    retq
826  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
827  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
828  %res = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %arg0, <16 x i8> %arg1, i8 0)
829  %bc = bitcast <16 x i8> %res to <2 x i64>
830  ret <2 x i64> %bc
831}
832declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone
833
834define <2 x i64> @test_mm_com_epu16(<2 x i64> %a0, <2 x i64> %a1) {
835; X32-LABEL: test_mm_com_epu16:
836; X32:       # %bb.0:
837; X32-NEXT:    vpcomltuw %xmm1, %xmm0, %xmm0
838; X32-NEXT:    retl
839;
840; X64-LABEL: test_mm_com_epu16:
841; X64:       # %bb.0:
842; X64-NEXT:    vpcomltuw %xmm1, %xmm0, %xmm0
843; X64-NEXT:    retq
844  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
845  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
846  %res = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %arg0, <8 x i16> %arg1, i8 0)
847  %bc = bitcast <8 x i16> %res to <2 x i64>
848  ret <2 x i64> %bc
849}
850declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone
851
852define <2 x i64> @test_mm_com_epu32(<2 x i64> %a0, <2 x i64> %a1) {
853; X32-LABEL: test_mm_com_epu32:
854; X32:       # %bb.0:
855; X32-NEXT:    vpcomltud %xmm1, %xmm0, %xmm0
856; X32-NEXT:    retl
857;
858; X64-LABEL: test_mm_com_epu32:
859; X64:       # %bb.0:
860; X64-NEXT:    vpcomltud %xmm1, %xmm0, %xmm0
861; X64-NEXT:    retq
862  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
863  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
864  %res = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %arg0, <4 x i32> %arg1, i8 0)
865  %bc = bitcast <4 x i32> %res to <2 x i64>
866  ret <2 x i64> %bc
867}
868declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone
869
870define <2 x i64> @test_mm_com_epu64(<2 x i64> %a0, <2 x i64> %a1) {
871; X32-LABEL: test_mm_com_epu64:
872; X32:       # %bb.0:
873; X32-NEXT:    vpcomltuq %xmm1, %xmm0, %xmm0
874; X32-NEXT:    retl
875;
876; X64-LABEL: test_mm_com_epu64:
877; X64:       # %bb.0:
878; X64-NEXT:    vpcomltuq %xmm1, %xmm0, %xmm0
879; X64-NEXT:    retq
880  %res = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %a0, <2 x i64> %a1, i8 0)
881  ret <2 x i64> %res
882}
883declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone
884
885define <2 x i64> @test_mm_com_epi8(<2 x i64> %a0, <2 x i64> %a1) {
886; X32-LABEL: test_mm_com_epi8:
887; X32:       # %bb.0:
888; X32-NEXT:    vpcomltb %xmm1, %xmm0, %xmm0
889; X32-NEXT:    retl
890;
891; X64-LABEL: test_mm_com_epi8:
892; X64:       # %bb.0:
893; X64-NEXT:    vpcomltb %xmm1, %xmm0, %xmm0
894; X64-NEXT:    retq
895  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
896  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
897  %res = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %arg0, <16 x i8> %arg1, i8 0)
898  %bc = bitcast <16 x i8> %res to <2 x i64>
899  ret <2 x i64> %bc
900}
901declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone
902
903define <2 x i64> @test_mm_com_epi16(<2 x i64> %a0, <2 x i64> %a1) {
904; X32-LABEL: test_mm_com_epi16:
905; X32:       # %bb.0:
906; X32-NEXT:    vpcomltw %xmm1, %xmm0, %xmm0
907; X32-NEXT:    retl
908;
909; X64-LABEL: test_mm_com_epi16:
910; X64:       # %bb.0:
911; X64-NEXT:    vpcomltw %xmm1, %xmm0, %xmm0
912; X64-NEXT:    retq
913  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
914  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
915  %res = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %arg0, <8 x i16> %arg1, i8 0)
916  %bc = bitcast <8 x i16> %res to <2 x i64>
917  ret <2 x i64> %bc
918}
919declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone
920
921define <2 x i64> @test_mm_com_epi32(<2 x i64> %a0, <2 x i64> %a1) {
922; X32-LABEL: test_mm_com_epi32:
923; X32:       # %bb.0:
924; X32-NEXT:    vpcomltd %xmm1, %xmm0, %xmm0
925; X32-NEXT:    retl
926;
927; X64-LABEL: test_mm_com_epi32:
928; X64:       # %bb.0:
929; X64-NEXT:    vpcomltd %xmm1, %xmm0, %xmm0
930; X64-NEXT:    retq
931  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
932  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
933  %res = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %arg0, <4 x i32> %arg1, i8 0)
934  %bc = bitcast <4 x i32> %res to <2 x i64>
935  ret <2 x i64> %bc
936}
937declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone
938
939define <2 x i64> @test_mm_com_epi64(<2 x i64> %a0, <2 x i64> %a1) {
940; X32-LABEL: test_mm_com_epi64:
941; X32:       # %bb.0:
942; X32-NEXT:    vpcomltq %xmm1, %xmm0, %xmm0
943; X32-NEXT:    retl
944;
945; X64-LABEL: test_mm_com_epi64:
946; X64:       # %bb.0:
947; X64-NEXT:    vpcomltq %xmm1, %xmm0, %xmm0
948; X64-NEXT:    retq
949  %res = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %a0, <2 x i64> %a1, i8 0)
950  ret <2 x i64> %res
951}
952declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone
953
954define <2 x double> @test_mm_permute2_pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2) {
955; X32-LABEL: test_mm_permute2_pd:
956; X32:       # %bb.0:
957; X32-NEXT:    vpermil2pd $0, %xmm2, %xmm1, %xmm0, %xmm0
958; X32-NEXT:    retl
959;
960; X64-LABEL: test_mm_permute2_pd:
961; X64:       # %bb.0:
962; X64-NEXT:    vpermil2pd $0, %xmm2, %xmm1, %xmm0, %xmm0
963; X64-NEXT:    retq
964  %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2, i8 0)
965  ret <2 x double> %res
966}
967declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x i64>, i8) nounwind readnone
968
969define <4 x double> @test_mm256_permute2_pd(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2) {
970; X32-LABEL: test_mm256_permute2_pd:
971; X32:       # %bb.0:
972; X32-NEXT:    vpermil2pd $0, %ymm2, %ymm1, %ymm0, %ymm0
973; X32-NEXT:    retl
974;
975; X64-LABEL: test_mm256_permute2_pd:
976; X64:       # %bb.0:
977; X64-NEXT:    vpermil2pd $0, %ymm2, %ymm1, %ymm0, %ymm0
978; X64-NEXT:    retq
979  %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2, i8 0)
980  ret <4 x double> %res
981}
982declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x i64>, i8) nounwind readnone
983
984define <4 x float> @test_mm_permute2_ps(<4 x float> %a0, <4 x float> %a1, <2 x i64> %a2) {
985; X32-LABEL: test_mm_permute2_ps:
986; X32:       # %bb.0:
987; X32-NEXT:    vpermil2ps $0, %xmm2, %xmm1, %xmm0, %xmm0
988; X32-NEXT:    retl
989;
990; X64-LABEL: test_mm_permute2_ps:
991; X64:       # %bb.0:
992; X64-NEXT:    vpermil2ps $0, %xmm2, %xmm1, %xmm0, %xmm0
993; X64-NEXT:    retq
994  %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
995  %res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> %arg2, i8 0)
996  ret <4 x float> %res
997}
998declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x i32>, i8) nounwind readnone
999
1000define <8 x float> @test_mm256_permute2_ps(<8 x float> %a0, <8 x float> %a1, <4 x i64> %a2) {
1001; X32-LABEL: test_mm256_permute2_ps:
1002; X32:       # %bb.0:
1003; X32-NEXT:    vpermil2ps $0, %ymm2, %ymm1, %ymm0, %ymm0
1004; X32-NEXT:    retl
1005;
1006; X64-LABEL: test_mm256_permute2_ps:
1007; X64:       # %bb.0:
1008; X64-NEXT:    vpermil2ps $0, %ymm2, %ymm1, %ymm0, %ymm0
1009; X64-NEXT:    retq
1010  %arg2 = bitcast <4 x i64> %a2 to <8 x i32>
1011  %res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> %arg2, i8 0)
1012  ret <8 x float> %res
1013}
1014declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x i32>, i8) nounwind readnone
1015
1016define <4 x float> @test_mm_frcz_ss(<4 x float> %a0) {
1017; X32-LABEL: test_mm_frcz_ss:
1018; X32:       # %bb.0:
1019; X32-NEXT:    vfrczss %xmm0, %xmm0
1020; X32-NEXT:    retl
1021;
1022; X64-LABEL: test_mm_frcz_ss:
1023; X64:       # %bb.0:
1024; X64-NEXT:    vfrczss %xmm0, %xmm0
1025; X64-NEXT:    retq
1026  %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0)
1027  ret <4 x float> %res
1028}
1029declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone
1030
1031define <2 x double> @test_mm_frcz_sd(<2 x double> %a0) {
1032; X32-LABEL: test_mm_frcz_sd:
1033; X32:       # %bb.0:
1034; X32-NEXT:    vfrczsd %xmm0, %xmm0
1035; X32-NEXT:    retl
1036;
1037; X64-LABEL: test_mm_frcz_sd:
1038; X64:       # %bb.0:
1039; X64-NEXT:    vfrczsd %xmm0, %xmm0
1040; X64-NEXT:    retq
1041  %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0)
1042  ret <2 x double> %res
1043}
1044declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone
1045
1046define <4 x float> @test_mm_frcz_ps(<4 x float> %a0) {
1047; X32-LABEL: test_mm_frcz_ps:
1048; X32:       # %bb.0:
1049; X32-NEXT:    vfrczps %xmm0, %xmm0
1050; X32-NEXT:    retl
1051;
1052; X64-LABEL: test_mm_frcz_ps:
1053; X64:       # %bb.0:
1054; X64-NEXT:    vfrczps %xmm0, %xmm0
1055; X64-NEXT:    retq
1056  %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %a0)
1057  ret <4 x float> %res
1058}
1059declare <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float>) nounwind readnone
1060
1061define <2 x double> @test_mm_frcz_pd(<2 x double> %a0) {
1062; X32-LABEL: test_mm_frcz_pd:
1063; X32:       # %bb.0:
1064; X32-NEXT:    vfrczpd %xmm0, %xmm0
1065; X32-NEXT:    retl
1066;
1067; X64-LABEL: test_mm_frcz_pd:
1068; X64:       # %bb.0:
1069; X64-NEXT:    vfrczpd %xmm0, %xmm0
1070; X64-NEXT:    retq
1071  %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %a0)
1072  ret <2 x double> %res
1073}
1074declare <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double>) nounwind readnone
1075
1076define <8 x float> @test_mm256_frcz_ps(<8 x float> %a0) {
1077; X32-LABEL: test_mm256_frcz_ps:
1078; X32:       # %bb.0:
1079; X32-NEXT:    vfrczps %ymm0, %ymm0
1080; X32-NEXT:    retl
1081;
1082; X64-LABEL: test_mm256_frcz_ps:
1083; X64:       # %bb.0:
1084; X64-NEXT:    vfrczps %ymm0, %ymm0
1085; X64-NEXT:    retq
1086  %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %a0)
1087  ret <8 x float> %res
1088}
1089declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone
1090
1091define <4 x double> @test_mm256_frcz_pd(<4 x double> %a0) {
1092; X32-LABEL: test_mm256_frcz_pd:
1093; X32:       # %bb.0:
1094; X32-NEXT:    vfrczpd %ymm0, %ymm0
1095; X32-NEXT:    retl
1096;
1097; X64-LABEL: test_mm256_frcz_pd:
1098; X64:       # %bb.0:
1099; X64-NEXT:    vfrczpd %ymm0, %ymm0
1100; X64-NEXT:    retq
1101  %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %a0)
1102  ret <4 x double> %res
1103}
1104declare <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double>) nounwind readnone
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122