1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefix=X64
4
5define <8 x i8> @test_pavgusb(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
6; X86-LABEL: test_pavgusb:
7; X86:       # %bb.0: # %entry
8; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
9; X86-NEXT:    pavgusb %mm1, %mm0
10; X86-NEXT:    movq %mm0, (%eax)
11; X86-NEXT:    retl $4
12;
13; X64-LABEL: test_pavgusb:
14; X64:       # %bb.0: # %entry
15; X64-NEXT:    pavgusb %mm1, %mm0
16; X64-NEXT:    movq2dq %mm0, %xmm0
17; X64-NEXT:    retq
18entry:
19  %0 = bitcast x86_mmx %a.coerce to <8 x i8>
20  %1 = bitcast x86_mmx %b.coerce to <8 x i8>
21  %2 = bitcast <8 x i8> %0 to x86_mmx
22  %3 = bitcast <8 x i8> %1 to x86_mmx
23  %4 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %2, x86_mmx %3)
24  %5 = bitcast x86_mmx %4 to <8 x i8>
25  ret <8 x i8> %5
26}
27
28declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone
29
30define <2 x i32> @test_pf2id(<2 x float> %a) nounwind readnone {
31; X86-LABEL: test_pf2id:
32; X86:       # %bb.0: # %entry
33; X86-NEXT:    pushl %ebp
34; X86-NEXT:    movl %esp, %ebp
35; X86-NEXT:    andl $-8, %esp
36; X86-NEXT:    subl $8, %esp
37; X86-NEXT:    movd 12(%ebp), %mm0
38; X86-NEXT:    movd 8(%ebp), %mm1
39; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
40; X86-NEXT:    pf2id %mm1, %mm0
41; X86-NEXT:    movq %mm0, (%esp)
42; X86-NEXT:    movl (%esp), %eax
43; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
44; X86-NEXT:    movl %ebp, %esp
45; X86-NEXT:    popl %ebp
46; X86-NEXT:    retl
47;
48; X64-LABEL: test_pf2id:
49; X64:       # %bb.0: # %entry
50; X64-NEXT:    movdq2q %xmm0, %mm0
51; X64-NEXT:    pf2id %mm0, %mm0
52; X64-NEXT:    movq2dq %mm0, %xmm0
53; X64-NEXT:    retq
54entry:
55  %0 = bitcast <2 x float> %a to x86_mmx
56  %1 = tail call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %0)
57  %2 = bitcast x86_mmx %1 to <2 x i32>
58  ret <2 x i32> %2
59}
60
61declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone
62
63define <2 x float> @test_pfacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
64; X86-LABEL: test_pfacc:
65; X86:       # %bb.0: # %entry
66; X86-NEXT:    pushl %ebp
67; X86-NEXT:    movl %esp, %ebp
68; X86-NEXT:    andl $-8, %esp
69; X86-NEXT:    subl $8, %esp
70; X86-NEXT:    movd 20(%ebp), %mm0
71; X86-NEXT:    movd 16(%ebp), %mm1
72; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
73; X86-NEXT:    movd 12(%ebp), %mm0
74; X86-NEXT:    movd 8(%ebp), %mm2
75; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
76; X86-NEXT:    pfacc %mm1, %mm2
77; X86-NEXT:    movq %mm2, (%esp)
78; X86-NEXT:    flds {{[0-9]+}}(%esp)
79; X86-NEXT:    flds (%esp)
80; X86-NEXT:    movl %ebp, %esp
81; X86-NEXT:    popl %ebp
82; X86-NEXT:    retl
83;
84; X64-LABEL: test_pfacc:
85; X64:       # %bb.0: # %entry
86; X64-NEXT:    movdq2q %xmm1, %mm0
87; X64-NEXT:    movdq2q %xmm0, %mm1
88; X64-NEXT:    pfacc %mm0, %mm1
89; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
90; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
91; X64-NEXT:    retq
92entry:
93  %0 = bitcast <2 x float> %a to x86_mmx
94  %1 = bitcast <2 x float> %b to x86_mmx
95  %2 = tail call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %0, x86_mmx %1)
96  %3 = bitcast x86_mmx %2 to <2 x float>
97  ret <2 x float> %3
98}
99
100declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone
101
102define <2 x float> @test_pfadd(<2 x float> %a, <2 x float> %b) nounwind readnone {
103; X86-LABEL: test_pfadd:
104; X86:       # %bb.0: # %entry
105; X86-NEXT:    pushl %ebp
106; X86-NEXT:    movl %esp, %ebp
107; X86-NEXT:    andl $-8, %esp
108; X86-NEXT:    subl $8, %esp
109; X86-NEXT:    movd 20(%ebp), %mm0
110; X86-NEXT:    movd 16(%ebp), %mm1
111; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
112; X86-NEXT:    movd 12(%ebp), %mm0
113; X86-NEXT:    movd 8(%ebp), %mm2
114; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
115; X86-NEXT:    pfadd %mm1, %mm2
116; X86-NEXT:    movq %mm2, (%esp)
117; X86-NEXT:    flds {{[0-9]+}}(%esp)
118; X86-NEXT:    flds (%esp)
119; X86-NEXT:    movl %ebp, %esp
120; X86-NEXT:    popl %ebp
121; X86-NEXT:    retl
122;
123; X64-LABEL: test_pfadd:
124; X64:       # %bb.0: # %entry
125; X64-NEXT:    movdq2q %xmm1, %mm0
126; X64-NEXT:    movdq2q %xmm0, %mm1
127; X64-NEXT:    pfadd %mm0, %mm1
128; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
129; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
130; X64-NEXT:    retq
131entry:
132  %0 = bitcast <2 x float> %a to x86_mmx
133  %1 = bitcast <2 x float> %b to x86_mmx
134  %2 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %0, x86_mmx %1)
135  %3 = bitcast x86_mmx %2 to <2 x float>
136  ret <2 x float> %3
137}
138
139declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone
140
141define <2 x i32> @test_pfcmpeq(<2 x float> %a, <2 x float> %b) nounwind readnone {
142; X86-LABEL: test_pfcmpeq:
143; X86:       # %bb.0: # %entry
144; X86-NEXT:    pushl %ebp
145; X86-NEXT:    movl %esp, %ebp
146; X86-NEXT:    andl $-8, %esp
147; X86-NEXT:    subl $8, %esp
148; X86-NEXT:    movd 20(%ebp), %mm0
149; X86-NEXT:    movd 16(%ebp), %mm1
150; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
151; X86-NEXT:    movd 12(%ebp), %mm0
152; X86-NEXT:    movd 8(%ebp), %mm2
153; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
154; X86-NEXT:    pfcmpeq %mm1, %mm2
155; X86-NEXT:    movq %mm2, (%esp)
156; X86-NEXT:    movl (%esp), %eax
157; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
158; X86-NEXT:    movl %ebp, %esp
159; X86-NEXT:    popl %ebp
160; X86-NEXT:    retl
161;
162; X64-LABEL: test_pfcmpeq:
163; X64:       # %bb.0: # %entry
164; X64-NEXT:    movdq2q %xmm1, %mm0
165; X64-NEXT:    movdq2q %xmm0, %mm1
166; X64-NEXT:    pfcmpeq %mm0, %mm1
167; X64-NEXT:    movq2dq %mm1, %xmm0
168; X64-NEXT:    retq
169entry:
170  %0 = bitcast <2 x float> %a to x86_mmx
171  %1 = bitcast <2 x float> %b to x86_mmx
172  %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %0, x86_mmx %1)
173  %3 = bitcast x86_mmx %2 to <2 x i32>
174  ret <2 x i32> %3
175}
176
177declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone
178
179define <2 x i32> @test_pfcmpge(<2 x float> %a, <2 x float> %b) nounwind readnone {
180; X86-LABEL: test_pfcmpge:
181; X86:       # %bb.0: # %entry
182; X86-NEXT:    pushl %ebp
183; X86-NEXT:    movl %esp, %ebp
184; X86-NEXT:    andl $-8, %esp
185; X86-NEXT:    subl $8, %esp
186; X86-NEXT:    movd 20(%ebp), %mm0
187; X86-NEXT:    movd 16(%ebp), %mm1
188; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
189; X86-NEXT:    movd 12(%ebp), %mm0
190; X86-NEXT:    movd 8(%ebp), %mm2
191; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
192; X86-NEXT:    pfcmpge %mm1, %mm2
193; X86-NEXT:    movq %mm2, (%esp)
194; X86-NEXT:    movl (%esp), %eax
195; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
196; X86-NEXT:    movl %ebp, %esp
197; X86-NEXT:    popl %ebp
198; X86-NEXT:    retl
199;
200; X64-LABEL: test_pfcmpge:
201; X64:       # %bb.0: # %entry
202; X64-NEXT:    movdq2q %xmm1, %mm0
203; X64-NEXT:    movdq2q %xmm0, %mm1
204; X64-NEXT:    pfcmpge %mm0, %mm1
205; X64-NEXT:    movq2dq %mm1, %xmm0
206; X64-NEXT:    retq
207entry:
208  %0 = bitcast <2 x float> %a to x86_mmx
209  %1 = bitcast <2 x float> %b to x86_mmx
210  %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %0, x86_mmx %1)
211  %3 = bitcast x86_mmx %2 to <2 x i32>
212  ret <2 x i32> %3
213}
214
215declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone
216
217define <2 x i32> @test_pfcmpgt(<2 x float> %a, <2 x float> %b) nounwind readnone {
218; X86-LABEL: test_pfcmpgt:
219; X86:       # %bb.0: # %entry
220; X86-NEXT:    pushl %ebp
221; X86-NEXT:    movl %esp, %ebp
222; X86-NEXT:    andl $-8, %esp
223; X86-NEXT:    subl $8, %esp
224; X86-NEXT:    movd 20(%ebp), %mm0
225; X86-NEXT:    movd 16(%ebp), %mm1
226; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
227; X86-NEXT:    movd 12(%ebp), %mm0
228; X86-NEXT:    movd 8(%ebp), %mm2
229; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
230; X86-NEXT:    pfcmpgt %mm1, %mm2
231; X86-NEXT:    movq %mm2, (%esp)
232; X86-NEXT:    movl (%esp), %eax
233; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
234; X86-NEXT:    movl %ebp, %esp
235; X86-NEXT:    popl %ebp
236; X86-NEXT:    retl
237;
238; X64-LABEL: test_pfcmpgt:
239; X64:       # %bb.0: # %entry
240; X64-NEXT:    movdq2q %xmm1, %mm0
241; X64-NEXT:    movdq2q %xmm0, %mm1
242; X64-NEXT:    pfcmpgt %mm0, %mm1
243; X64-NEXT:    movq2dq %mm1, %xmm0
244; X64-NEXT:    retq
245entry:
246  %0 = bitcast <2 x float> %a to x86_mmx
247  %1 = bitcast <2 x float> %b to x86_mmx
248  %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %0, x86_mmx %1)
249  %3 = bitcast x86_mmx %2 to <2 x i32>
250  ret <2 x i32> %3
251}
252
253declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone
254
255define <2 x float> @test_pfmax(<2 x float> %a, <2 x float> %b) nounwind readnone {
256; X86-LABEL: test_pfmax:
257; X86:       # %bb.0: # %entry
258; X86-NEXT:    pushl %ebp
259; X86-NEXT:    movl %esp, %ebp
260; X86-NEXT:    andl $-8, %esp
261; X86-NEXT:    subl $8, %esp
262; X86-NEXT:    movd 20(%ebp), %mm0
263; X86-NEXT:    movd 16(%ebp), %mm1
264; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
265; X86-NEXT:    movd 12(%ebp), %mm0
266; X86-NEXT:    movd 8(%ebp), %mm2
267; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
268; X86-NEXT:    pfmax %mm1, %mm2
269; X86-NEXT:    movq %mm2, (%esp)
270; X86-NEXT:    flds {{[0-9]+}}(%esp)
271; X86-NEXT:    flds (%esp)
272; X86-NEXT:    movl %ebp, %esp
273; X86-NEXT:    popl %ebp
274; X86-NEXT:    retl
275;
276; X64-LABEL: test_pfmax:
277; X64:       # %bb.0: # %entry
278; X64-NEXT:    movdq2q %xmm1, %mm0
279; X64-NEXT:    movdq2q %xmm0, %mm1
280; X64-NEXT:    pfmax %mm0, %mm1
281; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
282; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
283; X64-NEXT:    retq
284entry:
285  %0 = bitcast <2 x float> %a to x86_mmx
286  %1 = bitcast <2 x float> %b to x86_mmx
287  %2 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %0, x86_mmx %1)
288  %3 = bitcast x86_mmx %2 to <2 x float>
289  ret <2 x float> %3
290}
291
292declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone
293
294define <2 x float> @test_pfmin(<2 x float> %a, <2 x float> %b) nounwind readnone {
295; X86-LABEL: test_pfmin:
296; X86:       # %bb.0: # %entry
297; X86-NEXT:    pushl %ebp
298; X86-NEXT:    movl %esp, %ebp
299; X86-NEXT:    andl $-8, %esp
300; X86-NEXT:    subl $8, %esp
301; X86-NEXT:    movd 20(%ebp), %mm0
302; X86-NEXT:    movd 16(%ebp), %mm1
303; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
304; X86-NEXT:    movd 12(%ebp), %mm0
305; X86-NEXT:    movd 8(%ebp), %mm2
306; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
307; X86-NEXT:    pfmin %mm1, %mm2
308; X86-NEXT:    movq %mm2, (%esp)
309; X86-NEXT:    flds {{[0-9]+}}(%esp)
310; X86-NEXT:    flds (%esp)
311; X86-NEXT:    movl %ebp, %esp
312; X86-NEXT:    popl %ebp
313; X86-NEXT:    retl
314;
315; X64-LABEL: test_pfmin:
316; X64:       # %bb.0: # %entry
317; X64-NEXT:    movdq2q %xmm1, %mm0
318; X64-NEXT:    movdq2q %xmm0, %mm1
319; X64-NEXT:    pfmin %mm0, %mm1
320; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
321; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
322; X64-NEXT:    retq
323entry:
324  %0 = bitcast <2 x float> %a to x86_mmx
325  %1 = bitcast <2 x float> %b to x86_mmx
326  %2 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %0, x86_mmx %1)
327  %3 = bitcast x86_mmx %2 to <2 x float>
328  ret <2 x float> %3
329}
330
331declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone
332
333define <2 x float> @test_pfmul(<2 x float> %a, <2 x float> %b) nounwind readnone {
334; X86-LABEL: test_pfmul:
335; X86:       # %bb.0: # %entry
336; X86-NEXT:    pushl %ebp
337; X86-NEXT:    movl %esp, %ebp
338; X86-NEXT:    andl $-8, %esp
339; X86-NEXT:    subl $8, %esp
340; X86-NEXT:    movd 20(%ebp), %mm0
341; X86-NEXT:    movd 16(%ebp), %mm1
342; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
343; X86-NEXT:    movd 12(%ebp), %mm0
344; X86-NEXT:    movd 8(%ebp), %mm2
345; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
346; X86-NEXT:    pfmul %mm1, %mm2
347; X86-NEXT:    movq %mm2, (%esp)
348; X86-NEXT:    flds {{[0-9]+}}(%esp)
349; X86-NEXT:    flds (%esp)
350; X86-NEXT:    movl %ebp, %esp
351; X86-NEXT:    popl %ebp
352; X86-NEXT:    retl
353;
354; X64-LABEL: test_pfmul:
355; X64:       # %bb.0: # %entry
356; X64-NEXT:    movdq2q %xmm1, %mm0
357; X64-NEXT:    movdq2q %xmm0, %mm1
358; X64-NEXT:    pfmul %mm0, %mm1
359; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
360; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
361; X64-NEXT:    retq
362entry:
363  %0 = bitcast <2 x float> %a to x86_mmx
364  %1 = bitcast <2 x float> %b to x86_mmx
365  %2 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %0, x86_mmx %1)
366  %3 = bitcast x86_mmx %2 to <2 x float>
367  ret <2 x float> %3
368}
369
370declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone
371
372define <2 x float> @test_pfrcp(<2 x float> %a) nounwind readnone {
373; X86-LABEL: test_pfrcp:
374; X86:       # %bb.0: # %entry
375; X86-NEXT:    pushl %ebp
376; X86-NEXT:    movl %esp, %ebp
377; X86-NEXT:    andl $-8, %esp
378; X86-NEXT:    subl $8, %esp
379; X86-NEXT:    movd 12(%ebp), %mm0
380; X86-NEXT:    movd 8(%ebp), %mm1
381; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
382; X86-NEXT:    pfrcp %mm1, %mm0
383; X86-NEXT:    movq %mm0, (%esp)
384; X86-NEXT:    flds {{[0-9]+}}(%esp)
385; X86-NEXT:    flds (%esp)
386; X86-NEXT:    movl %ebp, %esp
387; X86-NEXT:    popl %ebp
388; X86-NEXT:    retl
389;
390; X64-LABEL: test_pfrcp:
391; X64:       # %bb.0: # %entry
392; X64-NEXT:    movdq2q %xmm0, %mm0
393; X64-NEXT:    pfrcp %mm0, %mm0
394; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
395; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
396; X64-NEXT:    retq
397entry:
398  %0 = bitcast <2 x float> %a to x86_mmx
399  %1 = tail call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %0)
400  %2 = bitcast x86_mmx %1 to <2 x float>
401  ret <2 x float> %2
402}
403
404declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone
405
406define <2 x float> @test_pfrcpit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
407; X86-LABEL: test_pfrcpit1:
408; X86:       # %bb.0: # %entry
409; X86-NEXT:    pushl %ebp
410; X86-NEXT:    movl %esp, %ebp
411; X86-NEXT:    andl $-8, %esp
412; X86-NEXT:    subl $8, %esp
413; X86-NEXT:    movd 20(%ebp), %mm0
414; X86-NEXT:    movd 16(%ebp), %mm1
415; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
416; X86-NEXT:    movd 12(%ebp), %mm0
417; X86-NEXT:    movd 8(%ebp), %mm2
418; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
419; X86-NEXT:    pfrcpit1 %mm1, %mm2
420; X86-NEXT:    movq %mm2, (%esp)
421; X86-NEXT:    flds {{[0-9]+}}(%esp)
422; X86-NEXT:    flds (%esp)
423; X86-NEXT:    movl %ebp, %esp
424; X86-NEXT:    popl %ebp
425; X86-NEXT:    retl
426;
427; X64-LABEL: test_pfrcpit1:
428; X64:       # %bb.0: # %entry
429; X64-NEXT:    movdq2q %xmm1, %mm0
430; X64-NEXT:    movdq2q %xmm0, %mm1
431; X64-NEXT:    pfrcpit1 %mm0, %mm1
432; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
433; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
434; X64-NEXT:    retq
435entry:
436  %0 = bitcast <2 x float> %a to x86_mmx
437  %1 = bitcast <2 x float> %b to x86_mmx
438  %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %0, x86_mmx %1)
439  %3 = bitcast x86_mmx %2 to <2 x float>
440  ret <2 x float> %3
441}
442
443declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone
444
445define <2 x float> @test_pfrcpit2(<2 x float> %a, <2 x float> %b) nounwind readnone {
446; X86-LABEL: test_pfrcpit2:
447; X86:       # %bb.0: # %entry
448; X86-NEXT:    pushl %ebp
449; X86-NEXT:    movl %esp, %ebp
450; X86-NEXT:    andl $-8, %esp
451; X86-NEXT:    subl $8, %esp
452; X86-NEXT:    movd 20(%ebp), %mm0
453; X86-NEXT:    movd 16(%ebp), %mm1
454; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
455; X86-NEXT:    movd 12(%ebp), %mm0
456; X86-NEXT:    movd 8(%ebp), %mm2
457; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
458; X86-NEXT:    pfrcpit2 %mm1, %mm2
459; X86-NEXT:    movq %mm2, (%esp)
460; X86-NEXT:    flds {{[0-9]+}}(%esp)
461; X86-NEXT:    flds (%esp)
462; X86-NEXT:    movl %ebp, %esp
463; X86-NEXT:    popl %ebp
464; X86-NEXT:    retl
465;
466; X64-LABEL: test_pfrcpit2:
467; X64:       # %bb.0: # %entry
468; X64-NEXT:    movdq2q %xmm1, %mm0
469; X64-NEXT:    movdq2q %xmm0, %mm1
470; X64-NEXT:    pfrcpit2 %mm0, %mm1
471; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
472; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
473; X64-NEXT:    retq
474entry:
475  %0 = bitcast <2 x float> %a to x86_mmx
476  %1 = bitcast <2 x float> %b to x86_mmx
477  %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %0, x86_mmx %1)
478  %3 = bitcast x86_mmx %2 to <2 x float>
479  ret <2 x float> %3
480}
481
482declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone
483
484define <2 x float> @test_pfrsqrt(<2 x float> %a) nounwind readnone {
485; X86-LABEL: test_pfrsqrt:
486; X86:       # %bb.0: # %entry
487; X86-NEXT:    pushl %ebp
488; X86-NEXT:    movl %esp, %ebp
489; X86-NEXT:    andl $-8, %esp
490; X86-NEXT:    subl $8, %esp
491; X86-NEXT:    movd 12(%ebp), %mm0
492; X86-NEXT:    movd 8(%ebp), %mm1
493; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
494; X86-NEXT:    pfrsqrt %mm1, %mm0
495; X86-NEXT:    movq %mm0, (%esp)
496; X86-NEXT:    flds {{[0-9]+}}(%esp)
497; X86-NEXT:    flds (%esp)
498; X86-NEXT:    movl %ebp, %esp
499; X86-NEXT:    popl %ebp
500; X86-NEXT:    retl
501;
502; X64-LABEL: test_pfrsqrt:
503; X64:       # %bb.0: # %entry
504; X64-NEXT:    movdq2q %xmm0, %mm0
505; X64-NEXT:    pfrsqrt %mm0, %mm0
506; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
507; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
508; X64-NEXT:    retq
509entry:
510  %0 = bitcast <2 x float> %a to x86_mmx
511  %1 = tail call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %0)
512  %2 = bitcast x86_mmx %1 to <2 x float>
513  ret <2 x float> %2
514}
515
516declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone
517
518define <2 x float> @test_pfrsqit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
519; X86-LABEL: test_pfrsqit1:
520; X86:       # %bb.0: # %entry
521; X86-NEXT:    pushl %ebp
522; X86-NEXT:    movl %esp, %ebp
523; X86-NEXT:    andl $-8, %esp
524; X86-NEXT:    subl $8, %esp
525; X86-NEXT:    movd 20(%ebp), %mm0
526; X86-NEXT:    movd 16(%ebp), %mm1
527; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
528; X86-NEXT:    movd 12(%ebp), %mm0
529; X86-NEXT:    movd 8(%ebp), %mm2
530; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
531; X86-NEXT:    pfrsqit1 %mm1, %mm2
532; X86-NEXT:    movq %mm2, (%esp)
533; X86-NEXT:    flds {{[0-9]+}}(%esp)
534; X86-NEXT:    flds (%esp)
535; X86-NEXT:    movl %ebp, %esp
536; X86-NEXT:    popl %ebp
537; X86-NEXT:    retl
538;
539; X64-LABEL: test_pfrsqit1:
540; X64:       # %bb.0: # %entry
541; X64-NEXT:    movdq2q %xmm1, %mm0
542; X64-NEXT:    movdq2q %xmm0, %mm1
543; X64-NEXT:    pfrsqit1 %mm0, %mm1
544; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
545; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
546; X64-NEXT:    retq
547entry:
548  %0 = bitcast <2 x float> %a to x86_mmx
549  %1 = bitcast <2 x float> %b to x86_mmx
550  %2 = tail call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %0, x86_mmx %1)
551  %3 = bitcast x86_mmx %2 to <2 x float>
552  ret <2 x float> %3
553}
554
555declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone
556
557define <2 x float> @test_pfsub(<2 x float> %a, <2 x float> %b) nounwind readnone {
558; X86-LABEL: test_pfsub:
559; X86:       # %bb.0: # %entry
560; X86-NEXT:    pushl %ebp
561; X86-NEXT:    movl %esp, %ebp
562; X86-NEXT:    andl $-8, %esp
563; X86-NEXT:    subl $8, %esp
564; X86-NEXT:    movd 20(%ebp), %mm0
565; X86-NEXT:    movd 16(%ebp), %mm1
566; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
567; X86-NEXT:    movd 12(%ebp), %mm0
568; X86-NEXT:    movd 8(%ebp), %mm2
569; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
570; X86-NEXT:    pfsub %mm1, %mm2
571; X86-NEXT:    movq %mm2, (%esp)
572; X86-NEXT:    flds {{[0-9]+}}(%esp)
573; X86-NEXT:    flds (%esp)
574; X86-NEXT:    movl %ebp, %esp
575; X86-NEXT:    popl %ebp
576; X86-NEXT:    retl
577;
578; X64-LABEL: test_pfsub:
579; X64:       # %bb.0: # %entry
580; X64-NEXT:    movdq2q %xmm1, %mm0
581; X64-NEXT:    movdq2q %xmm0, %mm1
582; X64-NEXT:    pfsub %mm0, %mm1
583; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
584; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
585; X64-NEXT:    retq
586entry:
587  %0 = bitcast <2 x float> %a to x86_mmx
588  %1 = bitcast <2 x float> %b to x86_mmx
589  %2 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %0, x86_mmx %1)
590  %3 = bitcast x86_mmx %2 to <2 x float>
591  ret <2 x float> %3
592}
593
594declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone
595
596define <2 x float> @test_pfsubr(<2 x float> %a, <2 x float> %b) nounwind readnone {
597; X86-LABEL: test_pfsubr:
598; X86:       # %bb.0: # %entry
599; X86-NEXT:    pushl %ebp
600; X86-NEXT:    movl %esp, %ebp
601; X86-NEXT:    andl $-8, %esp
602; X86-NEXT:    subl $8, %esp
603; X86-NEXT:    movd 20(%ebp), %mm0
604; X86-NEXT:    movd 16(%ebp), %mm1
605; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
606; X86-NEXT:    movd 12(%ebp), %mm0
607; X86-NEXT:    movd 8(%ebp), %mm2
608; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
609; X86-NEXT:    pfsubr %mm1, %mm2
610; X86-NEXT:    movq %mm2, (%esp)
611; X86-NEXT:    flds {{[0-9]+}}(%esp)
612; X86-NEXT:    flds (%esp)
613; X86-NEXT:    movl %ebp, %esp
614; X86-NEXT:    popl %ebp
615; X86-NEXT:    retl
616;
617; X64-LABEL: test_pfsubr:
618; X64:       # %bb.0: # %entry
619; X64-NEXT:    movdq2q %xmm1, %mm0
620; X64-NEXT:    movdq2q %xmm0, %mm1
621; X64-NEXT:    pfsubr %mm0, %mm1
622; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
623; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
624; X64-NEXT:    retq
625entry:
626  %0 = bitcast <2 x float> %a to x86_mmx
627  %1 = bitcast <2 x float> %b to x86_mmx
628  %2 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %0, x86_mmx %1)
629  %3 = bitcast x86_mmx %2 to <2 x float>
630  ret <2 x float> %3
631}
632
633declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone
634
635define <2 x float> @test_pi2fd(x86_mmx %a.coerce) nounwind readnone {
636; X86-LABEL: test_pi2fd:
637; X86:       # %bb.0: # %entry
638; X86-NEXT:    pushl %ebp
639; X86-NEXT:    movl %esp, %ebp
640; X86-NEXT:    andl $-8, %esp
641; X86-NEXT:    subl $8, %esp
642; X86-NEXT:    pi2fd %mm0, %mm0
643; X86-NEXT:    movq %mm0, (%esp)
644; X86-NEXT:    flds {{[0-9]+}}(%esp)
645; X86-NEXT:    flds (%esp)
646; X86-NEXT:    movl %ebp, %esp
647; X86-NEXT:    popl %ebp
648; X86-NEXT:    retl
649;
650; X64-LABEL: test_pi2fd:
651; X64:       # %bb.0: # %entry
652; X64-NEXT:    pi2fd %mm0, %mm0
653; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
654; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
655; X64-NEXT:    retq
656entry:
657  %0 = bitcast x86_mmx %a.coerce to <2 x i32>
658  %1 = bitcast <2 x i32> %0 to x86_mmx
659  %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1)
660  %3 = bitcast x86_mmx %2 to <2 x float>
661  ret <2 x float> %3
662}
663
664declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone
665
666define <4 x i16> @test_pmulhrw(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
667; X86-LABEL: test_pmulhrw:
668; X86:       # %bb.0: # %entry
669; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
670; X86-NEXT:    pmulhrw %mm1, %mm0
671; X86-NEXT:    movq %mm0, (%eax)
672; X86-NEXT:    retl $4
673;
674; X64-LABEL: test_pmulhrw:
675; X64:       # %bb.0: # %entry
676; X64-NEXT:    pmulhrw %mm1, %mm0
677; X64-NEXT:    movq2dq %mm0, %xmm0
678; X64-NEXT:    retq
679entry:
680  %0 = bitcast x86_mmx %a.coerce to <4 x i16>
681  %1 = bitcast x86_mmx %b.coerce to <4 x i16>
682  %2 = bitcast <4 x i16> %0 to x86_mmx
683  %3 = bitcast <4 x i16> %1 to x86_mmx
684  %4 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %2, x86_mmx %3)
685  %5 = bitcast x86_mmx %4 to <4 x i16>
686  ret <4 x i16> %5
687}
688
689declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone
690
691define <2 x i32> @test_pf2iw(<2 x float> %a) nounwind readnone {
692; X86-LABEL: test_pf2iw:
693; X86:       # %bb.0: # %entry
694; X86-NEXT:    pushl %ebp
695; X86-NEXT:    movl %esp, %ebp
696; X86-NEXT:    andl $-8, %esp
697; X86-NEXT:    subl $8, %esp
698; X86-NEXT:    movd 12(%ebp), %mm0
699; X86-NEXT:    movd 8(%ebp), %mm1
700; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
701; X86-NEXT:    pf2iw %mm1, %mm0
702; X86-NEXT:    movq %mm0, (%esp)
703; X86-NEXT:    movl (%esp), %eax
704; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
705; X86-NEXT:    movl %ebp, %esp
706; X86-NEXT:    popl %ebp
707; X86-NEXT:    retl
708;
709; X64-LABEL: test_pf2iw:
710; X64:       # %bb.0: # %entry
711; X64-NEXT:    movdq2q %xmm0, %mm0
712; X64-NEXT:    pf2iw %mm0, %mm0
713; X64-NEXT:    movq2dq %mm0, %xmm0
714; X64-NEXT:    retq
715entry:
716  %0 = bitcast <2 x float> %a to x86_mmx
717  %1 = tail call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %0)
718  %2 = bitcast x86_mmx %1 to <2 x i32>
719  ret <2 x i32> %2
720}
721
722declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone
723
724define <2 x float> @test_pfnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
725; X86-LABEL: test_pfnacc:
726; X86:       # %bb.0: # %entry
727; X86-NEXT:    pushl %ebp
728; X86-NEXT:    movl %esp, %ebp
729; X86-NEXT:    andl $-8, %esp
730; X86-NEXT:    subl $8, %esp
731; X86-NEXT:    movd 20(%ebp), %mm0
732; X86-NEXT:    movd 16(%ebp), %mm1
733; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
734; X86-NEXT:    movd 12(%ebp), %mm0
735; X86-NEXT:    movd 8(%ebp), %mm2
736; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
737; X86-NEXT:    pfnacc %mm1, %mm2
738; X86-NEXT:    movq %mm2, (%esp)
739; X86-NEXT:    flds {{[0-9]+}}(%esp)
740; X86-NEXT:    flds (%esp)
741; X86-NEXT:    movl %ebp, %esp
742; X86-NEXT:    popl %ebp
743; X86-NEXT:    retl
744;
745; X64-LABEL: test_pfnacc:
746; X64:       # %bb.0: # %entry
747; X64-NEXT:    movdq2q %xmm1, %mm0
748; X64-NEXT:    movdq2q %xmm0, %mm1
749; X64-NEXT:    pfnacc %mm0, %mm1
750; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
751; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
752; X64-NEXT:    retq
753entry:
754  %0 = bitcast <2 x float> %a to x86_mmx
755  %1 = bitcast <2 x float> %b to x86_mmx
756  %2 = tail call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %0, x86_mmx %1)
757  %3 = bitcast x86_mmx %2 to <2 x float>
758  ret <2 x float> %3
759}
760
761declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone
762
763define <2 x float> @test_pfpnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
764; X86-LABEL: test_pfpnacc:
765; X86:       # %bb.0: # %entry
766; X86-NEXT:    pushl %ebp
767; X86-NEXT:    movl %esp, %ebp
768; X86-NEXT:    andl $-8, %esp
769; X86-NEXT:    subl $8, %esp
770; X86-NEXT:    movd 20(%ebp), %mm0
771; X86-NEXT:    movd 16(%ebp), %mm1
772; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
773; X86-NEXT:    movd 12(%ebp), %mm0
774; X86-NEXT:    movd 8(%ebp), %mm2
775; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
776; X86-NEXT:    pfpnacc %mm1, %mm2
777; X86-NEXT:    movq %mm2, (%esp)
778; X86-NEXT:    flds {{[0-9]+}}(%esp)
779; X86-NEXT:    flds (%esp)
780; X86-NEXT:    movl %ebp, %esp
781; X86-NEXT:    popl %ebp
782; X86-NEXT:    retl
783;
784; X64-LABEL: test_pfpnacc:
785; X64:       # %bb.0: # %entry
786; X64-NEXT:    movdq2q %xmm1, %mm0
787; X64-NEXT:    movdq2q %xmm0, %mm1
788; X64-NEXT:    pfpnacc %mm0, %mm1
789; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
790; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
791; X64-NEXT:    retq
792entry:
793  %0 = bitcast <2 x float> %a to x86_mmx
794  %1 = bitcast <2 x float> %b to x86_mmx
795  %2 = tail call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %0, x86_mmx %1)
796  %3 = bitcast x86_mmx %2 to <2 x float>
797  ret <2 x float> %3
798}
799
800declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone
801
802define <2 x float> @test_pi2fw(x86_mmx %a.coerce) nounwind readnone {
803; X86-LABEL: test_pi2fw:
804; X86:       # %bb.0: # %entry
805; X86-NEXT:    pushl %ebp
806; X86-NEXT:    movl %esp, %ebp
807; X86-NEXT:    andl $-8, %esp
808; X86-NEXT:    subl $8, %esp
809; X86-NEXT:    pi2fw %mm0, %mm0
810; X86-NEXT:    movq %mm0, (%esp)
811; X86-NEXT:    flds {{[0-9]+}}(%esp)
812; X86-NEXT:    flds (%esp)
813; X86-NEXT:    movl %ebp, %esp
814; X86-NEXT:    popl %ebp
815; X86-NEXT:    retl
816;
817; X64-LABEL: test_pi2fw:
818; X64:       # %bb.0: # %entry
819; X64-NEXT:    pi2fw %mm0, %mm0
820; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
821; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
822; X64-NEXT:    retq
823entry:
824  %0 = bitcast x86_mmx %a.coerce to <2 x i32>
825  %1 = bitcast <2 x i32> %0 to x86_mmx
826  %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1)
827  %3 = bitcast x86_mmx %2 to <2 x float>
828  ret <2 x float> %3
829}
830
831declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone
832
833define <2 x float> @test_pswapdsf(<2 x float> %a) nounwind readnone {
834; X86-LABEL: test_pswapdsf:
835; X86:       # %bb.0: # %entry
836; X86-NEXT:    pushl %ebp
837; X86-NEXT:    movl %esp, %ebp
838; X86-NEXT:    andl $-8, %esp
839; X86-NEXT:    subl $8, %esp
840; X86-NEXT:    movd 12(%ebp), %mm0
841; X86-NEXT:    movd 8(%ebp), %mm1
842; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
843; X86-NEXT:    pswapd %mm1, %mm0 # mm0 = mm1[1,0]
844; X86-NEXT:    movq %mm0, (%esp)
845; X86-NEXT:    flds {{[0-9]+}}(%esp)
846; X86-NEXT:    flds (%esp)
847; X86-NEXT:    movl %ebp, %esp
848; X86-NEXT:    popl %ebp
849; X86-NEXT:    retl
850;
851; X64-LABEL: test_pswapdsf:
852; X64:       # %bb.0: # %entry
853; X64-NEXT:    movdq2q %xmm0, %mm0
854; X64-NEXT:    pswapd %mm0, %mm0 # mm0 = mm0[1,0]
855; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
856; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
857; X64-NEXT:    retq
858entry:
859  %0 = bitcast <2 x float> %a to x86_mmx
860  %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
861  %2 = bitcast x86_mmx %1 to <2 x float>
862  ret <2 x float> %2
863}
864
865define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone {
866; X86-LABEL: test_pswapdsi:
867; X86:       # %bb.0: # %entry
868; X86-NEXT:    pushl %ebp
869; X86-NEXT:    movl %esp, %ebp
870; X86-NEXT:    andl $-8, %esp
871; X86-NEXT:    subl $8, %esp
872; X86-NEXT:    movd 12(%ebp), %mm0
873; X86-NEXT:    movd 8(%ebp), %mm1
874; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
875; X86-NEXT:    pswapd %mm1, %mm0 # mm0 = mm1[1,0]
876; X86-NEXT:    movq %mm0, (%esp)
877; X86-NEXT:    movl (%esp), %eax
878; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
879; X86-NEXT:    movl %ebp, %esp
880; X86-NEXT:    popl %ebp
881; X86-NEXT:    retl
882;
883; X64-LABEL: test_pswapdsi:
884; X64:       # %bb.0: # %entry
885; X64-NEXT:    movdq2q %xmm0, %mm0
886; X64-NEXT:    pswapd %mm0, %mm0 # mm0 = mm0[1,0]
887; X64-NEXT:    movq2dq %mm0, %xmm0
888; X64-NEXT:    retq
889entry:
890  %0 = bitcast <2 x i32> %a to x86_mmx
891  %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
892  %2 = bitcast x86_mmx %1 to <2 x i32>
893  ret <2 x i32> %2
894}
895
896declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone
897