1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+3dnow | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+3dnow | FileCheck %s --check-prefix=X64
4
5define void @commute_m_pfadd(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
6; X32-LABEL: commute_m_pfadd:
7; X32:       # %bb.0:
8; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
9; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
10; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
11; X32-NEXT:    movq (%edx), %mm0
12; X32-NEXT:    pfadd (%eax), %mm0
13; X32-NEXT:    pfadd (%ecx), %mm0
14; X32-NEXT:    movq %mm0, (%ecx)
15; X32-NEXT:    retl
16;
17; X64-LABEL: commute_m_pfadd:
18; X64:       # %bb.0:
19; X64-NEXT:    movq (%rdi), %mm0
20; X64-NEXT:    pfadd (%rsi), %mm0
21; X64-NEXT:    pfadd (%rdx), %mm0
22; X64-NEXT:    movq %mm0, (%rdx)
23; X64-NEXT:    retq
24  %1 = load x86_mmx, x86_mmx* %a0
25  %2 = load x86_mmx, x86_mmx* %a1
26  %3 = load x86_mmx, x86_mmx* %a2
27  %4 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %1, x86_mmx %2)
28  %5 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %3, x86_mmx %4)
29  store x86_mmx %5, x86_mmx* %a2
30  ret void
31}
32declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx)
33
34define void @commute_m_pfsub(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
35; X32-LABEL: commute_m_pfsub:
36; X32:       # %bb.0:
37; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
38; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
39; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
40; X32-NEXT:    movq (%edx), %mm0
41; X32-NEXT:    pfsub (%eax), %mm0
42; X32-NEXT:    pfsubr (%ecx), %mm0
43; X32-NEXT:    movq %mm0, (%ecx)
44; X32-NEXT:    retl
45;
46; X64-LABEL: commute_m_pfsub:
47; X64:       # %bb.0:
48; X64-NEXT:    movq (%rdi), %mm0
49; X64-NEXT:    pfsub (%rsi), %mm0
50; X64-NEXT:    pfsubr (%rdx), %mm0
51; X64-NEXT:    movq %mm0, (%rdx)
52; X64-NEXT:    retq
53  %1 = load x86_mmx, x86_mmx* %a0
54  %2 = load x86_mmx, x86_mmx* %a1
55  %3 = load x86_mmx, x86_mmx* %a2
56  %4 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %1, x86_mmx %2)
57  %5 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %3, x86_mmx %4)
58  store x86_mmx %5, x86_mmx* %a2
59  ret void
60}
61declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx)
62
63define void @commute_m_pfsubr(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
64; X32-LABEL: commute_m_pfsubr:
65; X32:       # %bb.0:
66; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
67; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
68; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
69; X32-NEXT:    movq (%edx), %mm0
70; X32-NEXT:    pfsubr (%eax), %mm0
71; X32-NEXT:    pfsub (%ecx), %mm0
72; X32-NEXT:    movq %mm0, (%ecx)
73; X32-NEXT:    retl
74;
75; X64-LABEL: commute_m_pfsubr:
76; X64:       # %bb.0:
77; X64-NEXT:    movq (%rdi), %mm0
78; X64-NEXT:    pfsubr (%rsi), %mm0
79; X64-NEXT:    pfsub (%rdx), %mm0
80; X64-NEXT:    movq %mm0, (%rdx)
81; X64-NEXT:    retq
82  %1 = load x86_mmx, x86_mmx* %a0
83  %2 = load x86_mmx, x86_mmx* %a1
84  %3 = load x86_mmx, x86_mmx* %a2
85  %4 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %1, x86_mmx %2)
86  %5 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %3, x86_mmx %4)
87  store x86_mmx %5, x86_mmx* %a2
88  ret void
89}
90declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx)
91
92define void @commute_m_pfmul(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
93; X32-LABEL: commute_m_pfmul:
94; X32:       # %bb.0:
95; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
96; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
97; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
98; X32-NEXT:    movq (%edx), %mm0
99; X32-NEXT:    pfmul (%eax), %mm0
100; X32-NEXT:    pfmul (%ecx), %mm0
101; X32-NEXT:    movq %mm0, (%ecx)
102; X32-NEXT:    retl
103;
104; X64-LABEL: commute_m_pfmul:
105; X64:       # %bb.0:
106; X64-NEXT:    movq (%rdi), %mm0
107; X64-NEXT:    pfmul (%rsi), %mm0
108; X64-NEXT:    pfmul (%rdx), %mm0
109; X64-NEXT:    movq %mm0, (%rdx)
110; X64-NEXT:    retq
111  %1 = load x86_mmx, x86_mmx* %a0
112  %2 = load x86_mmx, x86_mmx* %a1
113  %3 = load x86_mmx, x86_mmx* %a2
114  %4 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %1, x86_mmx %2)
115  %5 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %3, x86_mmx %4)
116  store x86_mmx %5, x86_mmx* %a2
117  ret void
118}
119declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx)
120
121; PFMAX can't commute without fast-math.
122define void @commute_m_pfmax(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
123; X32-LABEL: commute_m_pfmax:
124; X32:       # %bb.0:
125; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
126; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
127; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
128; X32-NEXT:    movq (%edx), %mm0
129; X32-NEXT:    movq (%ecx), %mm1
130; X32-NEXT:    pfmax (%eax), %mm0
131; X32-NEXT:    pfmax %mm0, %mm1
132; X32-NEXT:    movq %mm1, (%ecx)
133; X32-NEXT:    retl
134;
135; X64-LABEL: commute_m_pfmax:
136; X64:       # %bb.0:
137; X64-NEXT:    movq (%rdi), %mm0
138; X64-NEXT:    movq (%rdx), %mm1
139; X64-NEXT:    pfmax (%rsi), %mm0
140; X64-NEXT:    pfmax %mm0, %mm1
141; X64-NEXT:    movq %mm1, (%rdx)
142; X64-NEXT:    retq
143  %1 = load x86_mmx, x86_mmx* %a0
144  %2 = load x86_mmx, x86_mmx* %a1
145  %3 = load x86_mmx, x86_mmx* %a2
146  %4 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %1, x86_mmx %2)
147  %5 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %3, x86_mmx %4)
148  store x86_mmx %5, x86_mmx* %a2
149  ret void
150}
151declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx)
152
153; PFMIN can't commute without fast-math.
154define void @commute_m_pfmin(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
155; X32-LABEL: commute_m_pfmin:
156; X32:       # %bb.0:
157; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
158; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
159; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
160; X32-NEXT:    movq (%edx), %mm0
161; X32-NEXT:    movq (%ecx), %mm1
162; X32-NEXT:    pfmin (%eax), %mm0
163; X32-NEXT:    pfmin %mm0, %mm1
164; X32-NEXT:    movq %mm1, (%ecx)
165; X32-NEXT:    retl
166;
167; X64-LABEL: commute_m_pfmin:
168; X64:       # %bb.0:
169; X64-NEXT:    movq (%rdi), %mm0
170; X64-NEXT:    movq (%rdx), %mm1
171; X64-NEXT:    pfmin (%rsi), %mm0
172; X64-NEXT:    pfmin %mm0, %mm1
173; X64-NEXT:    movq %mm1, (%rdx)
174; X64-NEXT:    retq
175  %1 = load x86_mmx, x86_mmx* %a0
176  %2 = load x86_mmx, x86_mmx* %a1
177  %3 = load x86_mmx, x86_mmx* %a2
178  %4 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %1, x86_mmx %2)
179  %5 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %3, x86_mmx %4)
180  store x86_mmx %5, x86_mmx* %a2
181  ret void
182}
183declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx)
184
185define void @commute_m_pfcmpeq(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
186; X32-LABEL: commute_m_pfcmpeq:
187; X32:       # %bb.0:
188; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
189; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
190; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
191; X32-NEXT:    movq (%edx), %mm0
192; X32-NEXT:    pfcmpeq (%eax), %mm0
193; X32-NEXT:    pfcmpeq (%ecx), %mm0
194; X32-NEXT:    movq %mm0, (%ecx)
195; X32-NEXT:    retl
196;
197; X64-LABEL: commute_m_pfcmpeq:
198; X64:       # %bb.0:
199; X64-NEXT:    movq (%rdi), %mm0
200; X64-NEXT:    pfcmpeq (%rsi), %mm0
201; X64-NEXT:    pfcmpeq (%rdx), %mm0
202; X64-NEXT:    movq %mm0, (%rdx)
203; X64-NEXT:    retq
204  %1 = load x86_mmx, x86_mmx* %a0
205  %2 = load x86_mmx, x86_mmx* %a1
206  %3 = load x86_mmx, x86_mmx* %a2
207  %4 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %1, x86_mmx %2)
208  %5 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %3, x86_mmx %4)
209  store x86_mmx %5, x86_mmx* %a2
210  ret void
211}
212declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx)
213
214define void @commute_m_pavgusb(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
215; X32-LABEL: commute_m_pavgusb:
216; X32:       # %bb.0:
217; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
218; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
219; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
220; X32-NEXT:    movq (%edx), %mm0
221; X32-NEXT:    pavgusb (%eax), %mm0
222; X32-NEXT:    pavgusb (%ecx), %mm0
223; X32-NEXT:    movq %mm0, (%ecx)
224; X32-NEXT:    retl
225;
226; X64-LABEL: commute_m_pavgusb:
227; X64:       # %bb.0:
228; X64-NEXT:    movq (%rdi), %mm0
229; X64-NEXT:    pavgusb (%rsi), %mm0
230; X64-NEXT:    pavgusb (%rdx), %mm0
231; X64-NEXT:    movq %mm0, (%rdx)
232; X64-NEXT:    retq
233  %1 = load x86_mmx, x86_mmx* %a0
234  %2 = load x86_mmx, x86_mmx* %a1
235  %3 = load x86_mmx, x86_mmx* %a2
236  %4 = tail call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %1, x86_mmx %2)
237  %5 = tail call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %3, x86_mmx %4)
238  store x86_mmx %5, x86_mmx* %a2
239  ret void
240}
241declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx)
242
243define void @commute_m_pmulhrw(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
244; X32-LABEL: commute_m_pmulhrw:
245; X32:       # %bb.0:
246; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
247; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
248; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
249; X32-NEXT:    movq (%edx), %mm0
250; X32-NEXT:    pmulhrw (%eax), %mm0
251; X32-NEXT:    pmulhrw (%ecx), %mm0
252; X32-NEXT:    movq %mm0, (%ecx)
253; X32-NEXT:    retl
254;
255; X64-LABEL: commute_m_pmulhrw:
256; X64:       # %bb.0:
257; X64-NEXT:    movq (%rdi), %mm0
258; X64-NEXT:    pmulhrw (%rsi), %mm0
259; X64-NEXT:    pmulhrw (%rdx), %mm0
260; X64-NEXT:    movq %mm0, (%rdx)
261; X64-NEXT:    retq
262  %1 = load x86_mmx, x86_mmx* %a0
263  %2 = load x86_mmx, x86_mmx* %a1
264  %3 = load x86_mmx, x86_mmx* %a2
265  %4 = tail call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %1, x86_mmx %2)
266  %5 = tail call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %3, x86_mmx %4)
267  store x86_mmx %5, x86_mmx* %a2
268  ret void
269}
270declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx)
271