1; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx | FileCheck %s
2; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 | FileCheck -check-prefix=CHECK-FISL %s
3
4; Also run with -schedule-ppc-vsx-fma-mutation-early as a stress test for the
5; live-interval-updating logic.
6; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx -schedule-ppc-vsx-fma-mutation-early
7target datalayout = "E-m:e-i64:64-n32:64"
8target triple = "powerpc64-unknown-linux-gnu"
9
10define void @test1(double %a, double %b, double %c, double %e, double* nocapture %d) #0 {
11entry:
12  %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
13  store double %0, double* %d, align 8
14  %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
15  %arrayidx1 = getelementptr inbounds double* %d, i64 1
16  store double %1, double* %arrayidx1, align 8
17  ret void
18
19; CHECK-LABEL: @test1
20; CHECK-DAG: li [[C1:[0-9]+]], 8
21; CHECK-DAG: xsmaddmdp 3, 2, 1
22; CHECK-DAG: xsmaddadp 1, 2, 4
23; CHECK-DAG: stxsdx 3, 0, 7
24; CHECK-DAG: stxsdx 1, 7, [[C1]]
25; CHECK: blr
26
27; CHECK-FISL-LABEL: @test1
28; CHECK-FISL-DAG: fmr 0, 1
29; CHECK-FISL-DAG: xsmaddadp 0, 2, 3
30; CHECK-FISL-DAG: stxsdx 0, 0, 7
31; CHECK-FISL-DAG: xsmaddadp 1, 2, 4
32; CHECK-FISL-DAG: li [[C1:[0-9]+]], 8
33; CHECK-FISL-DAG: stxsdx 1, 7, [[C1]]
34; CHECK-FISL: blr
35}
36
37define void @test2(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 {
38entry:
39  %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
40  store double %0, double* %d, align 8
41  %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
42  %arrayidx1 = getelementptr inbounds double* %d, i64 1
43  store double %1, double* %arrayidx1, align 8
44  %2 = tail call double @llvm.fma.f64(double %b, double %f, double %a)
45  %arrayidx2 = getelementptr inbounds double* %d, i64 2
46  store double %2, double* %arrayidx2, align 8
47  ret void
48
49; CHECK-LABEL: @test2
50; CHECK-DAG: li [[C1:[0-9]+]], 8
51; CHECK-DAG: li [[C2:[0-9]+]], 16
52; CHECK-DAG: xsmaddmdp 3, 2, 1
53; CHECK-DAG: xsmaddmdp 4, 2, 1
54; CHECK-DAG: xsmaddadp 1, 2, 5
55; CHECK-DAG: stxsdx 3, 0, 8
56; CHECK-DAG: stxsdx 4, 8, [[C1]]
57; CHECK-DAG: stxsdx 1, 8, [[C2]]
58; CHECK: blr
59
60; CHECK-FISL-LABEL: @test2
61; CHECK-FISL-DAG: fmr 0, 1
62; CHECK-FISL-DAG: xsmaddadp 0, 2, 3
63; CHECK-FISL-DAG: stxsdx 0, 0, 8
64; CHECK-FISL-DAG: fmr 0, 1
65; CHECK-FISL-DAG: xsmaddadp 0, 2, 4
66; CHECK-FISL-DAG: li [[C1:[0-9]+]], 8
67; CHECK-FISL-DAG: stxsdx 0, 8, [[C1]]
68; CHECK-FISL-DAG: xsmaddadp 1, 2, 5
69; CHECK-FISL-DAG: li [[C2:[0-9]+]], 16
70; CHECK-FISL-DAG: stxsdx 1, 8, [[C2]]
71; CHECK-FISL: blr
72}
73
74define void @test3(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 {
75entry:
76  %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
77  store double %0, double* %d, align 8
78  %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
79  %2 = tail call double @llvm.fma.f64(double %b, double %c, double %1)
80  %arrayidx1 = getelementptr inbounds double* %d, i64 3
81  store double %2, double* %arrayidx1, align 8
82  %3 = tail call double @llvm.fma.f64(double %b, double %f, double %a)
83  %arrayidx2 = getelementptr inbounds double* %d, i64 2
84  store double %3, double* %arrayidx2, align 8
85  %arrayidx3 = getelementptr inbounds double* %d, i64 1
86  store double %1, double* %arrayidx3, align 8
87  ret void
88
89; CHECK-LABEL: @test3
90; CHECK-DAG: fmr [[F1:[0-9]+]], 1
91; CHECK-DAG: li [[C1:[0-9]+]], 24
92; CHECK-DAG: li [[C2:[0-9]+]], 16
93; CHECK-DAG: li [[C3:[0-9]+]], 8
94; CHECK-DAG: xsmaddmdp 4, 2, 1
95; CHECK-DAG: xsmaddadp 1, 2, 5
96
97; Note: We could convert this next FMA to M-type as well, but it would require
98; re-ordering the instructions.
99; CHECK-DAG: xsmaddadp [[F1]], 2, 3
100
101; CHECK-DAG: xsmaddmdp 2, 3, 4
102; CHECK-DAG: stxsdx [[F1]], 0, 8
103; CHECK-DAG: stxsdx 2, 8, [[C1]]
104; CHECK-DAG: stxsdx 1, 8, [[C2]]
105; CHECK-DAG: stxsdx 4, 8, [[C3]]
106; CHECK: blr
107
108; CHECK-FISL-LABEL: @test3
109; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1
110; CHECK-FISL-DAG: xsmaddadp [[F1]], 2, 4
111; CHECK-FISL-DAG: fmr 4, [[F1]]
112; CHECK-FISL-DAG: xsmaddadp 4, 2, 3
113; CHECK-FISL-DAG: li [[C1:[0-9]+]], 24
114; CHECK-FISL-DAG: stxsdx 4, 8, [[C1]]
115; CHECK-FISL-DAG: xsmaddadp 1, 2, 5
116; CHECK-FISL-DAG: li [[C2:[0-9]+]], 16
117; CHECK-FISL-DAG: stxsdx 1, 8, [[C2]]
118; CHECK-FISL-DAG: li [[C3:[0-9]+]], 8
119; CHECK-FISL-DAG: stxsdx 0, 8, [[C3]]
120; CHECK-FISL: blr
121}
122
123define void @test4(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 {
124entry:
125  %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
126  store double %0, double* %d, align 8
127  %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
128  %arrayidx1 = getelementptr inbounds double* %d, i64 1
129  store double %1, double* %arrayidx1, align 8
130  %2 = tail call double @llvm.fma.f64(double %b, double %c, double %1)
131  %arrayidx3 = getelementptr inbounds double* %d, i64 3
132  store double %2, double* %arrayidx3, align 8
133  %3 = tail call double @llvm.fma.f64(double %b, double %f, double %a)
134  %arrayidx4 = getelementptr inbounds double* %d, i64 2
135  store double %3, double* %arrayidx4, align 8
136  ret void
137
138; CHECK-LABEL: @test4
139; CHECK-DAG: fmr [[F1:[0-9]+]], 1
140; CHECK-DAG: li [[C1:[0-9]+]], 8
141; CHECK-DAG: li [[C2:[0-9]+]], 16
142; CHECK-DAG: xsmaddmdp 4, 2, 1
143
144; Note: We could convert this next FMA to M-type as well, but it would require
145; re-ordering the instructions.
146; CHECK-DAG: xsmaddadp 1, 2, 5
147
148; CHECK-DAG: xsmaddadp [[F1]], 2, 3
149; CHECK-DAG: stxsdx [[F1]], 0, 8
150; CHECK-DAG: stxsdx 4, 8, [[C1]]
151; CHECK-DAG: li [[C3:[0-9]+]], 24
152; CHECK-DAG: xsmaddadp 4, 2, 3
153; CHECK-DAG: stxsdx 4, 8, [[C3]]
154; CHECK-DAG: stxsdx 1, 8, [[C2]]
155; CHECK: blr
156
157; CHECK-FISL-LABEL: @test4
158; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1
159; CHECK-FISL-DAG: xsmaddadp [[F1]], 2, 3
160; CHECK-FISL-DAG: stxsdx 0, 0, 8
161; CHECK-FISL-DAG: fmr [[F1]], 1
162; CHECK-FISL-DAG: xsmaddadp [[F1]], 2, 4
163; CHECK-FISL-DAG: li [[C3:[0-9]+]], 8
164; CHECK-FISL-DAG: stxsdx 0, 8, [[C3]]
165; CHECK-FISL-DAG: xsmaddadp 0, 2, 3
166; CHECK-FISL-DAG: li [[C1:[0-9]+]], 24
167; CHECK-FISL-DAG: stxsdx 0, 8, [[C1]]
168; CHECK-FISL-DAG: xsmaddadp 1, 2, 5
169; CHECK-FISL-DAG: li [[C2:[0-9]+]], 16
170; CHECK-FISL-DAG: stxsdx 1, 8, [[C2]]
171; CHECK-FISL: blr
172}
173
174declare double @llvm.fma.f64(double, double, double) #0
175
176define void @testv1(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double>* nocapture %d) #0 {
177entry:
178  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
179  store <2 x double> %0, <2 x double>* %d, align 8
180  %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
181  %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1
182  store <2 x double> %1, <2 x double>* %arrayidx1, align 8
183  ret void
184
185; CHECK-LABEL: @testv1
186; CHECK-DAG: xvmaddmdp 36, 35, 34
187; CHECK-DAG: xvmaddadp 34, 35, 37
188; CHECK-DAG: li [[C1:[0-9]+]], 16
189; CHECK-DAG: stxvd2x 36, 0, 3
190; CHECK-DAG: stxvd2x 34, 3, [[C1:[0-9]+]]
191; CHECK: blr
192
193; CHECK-FISL-LABEL: @testv1
194; CHECK-FISL-DAG: xxlor 0, 34, 34
195; CHECK-FISL-DAG: xvmaddadp 0, 35, 36
196; CHECK-FISL-DAG: stxvd2x 0, 0, 3
197; CHECK-FISL-DAG: xvmaddadp 34, 35, 37
198; CHECK-FISL-DAG: li [[C1:[0-9]+]], 16
199; CHECK-FISL-DAG: stxvd2x 34, 3, [[C1:[0-9]+]]
200; CHECK-FISL: blr
201}
202
203define void @testv2(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
204entry:
205  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
206  store <2 x double> %0, <2 x double>* %d, align 8
207  %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
208  %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1
209  store <2 x double> %1, <2 x double>* %arrayidx1, align 8
210  %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
211  %arrayidx2 = getelementptr inbounds <2 x double>* %d, i64 2
212  store <2 x double> %2, <2 x double>* %arrayidx2, align 8
213  ret void
214
215; CHECK-LABEL: @testv2
216; CHECK-DAG: xvmaddmdp 36, 35, 34
217; CHECK-DAG: xvmaddmdp 37, 35, 34
218; CHECK-DAG: li [[C1:[0-9]+]], 16
219; CHECK-DAG: li [[C2:[0-9]+]], 32
220; CHECK-DAG: xvmaddadp 34, 35, 38
221; CHECK-DAG: stxvd2x 36, 0, 3
222; CHECK-DAG: stxvd2x 37, 3, [[C1:[0-9]+]]
223; CHECK-DAG: stxvd2x 34, 3, [[C2:[0-9]+]]
224; CHECK: blr
225
226; CHECK-FISL-LABEL: @testv2
227; CHECK-FISL-DAG: xxlor 0, 34, 34
228; CHECK-FISL-DAG: xvmaddadp 0, 35, 36
229; CHECK-FISL-DAG: stxvd2x 0, 0, 3
230; CHECK-FISL-DAG: xxlor 0, 34, 34
231; CHECK-FISL-DAG: xvmaddadp 0, 35, 37
232; CHECK-FISL-DAG: li [[C1:[0-9]+]], 16
233; CHECK-FISL-DAG: stxvd2x 0, 3, [[C1:[0-9]+]]
234; CHECK-FISL-DAG: xvmaddadp 34, 35, 38
235; CHECK-FISL-DAG: li [[C2:[0-9]+]], 32
236; CHECK-FISL-DAG: stxvd2x 34, 3, [[C2:[0-9]+]]
237; CHECK-FISL: blr
238}
239
240define void @testv3(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
241entry:
242  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
243  store <2 x double> %0, <2 x double>* %d, align 8
244  %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
245  %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %1)
246  %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 3
247  store <2 x double> %2, <2 x double>* %arrayidx1, align 8
248  %3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
249  %arrayidx2 = getelementptr inbounds <2 x double>* %d, i64 2
250  store <2 x double> %3, <2 x double>* %arrayidx2, align 8
251  %arrayidx3 = getelementptr inbounds <2 x double>* %d, i64 1
252  store <2 x double> %1, <2 x double>* %arrayidx3, align 8
253  ret void
254
255; Note: There is some unavoidable changeability in this variant.  If the
256; FMAs are reordered differently, the algorithm can pick a different
257; multiplicand to destroy, changing the register assignment.  There isn't
258; a good way to express this possibility, so hopefully this doesn't change
259; too often.
260
261; CHECK-LABEL: @testv3
262; CHECK-DAG: xxlor [[V1:[0-9]+]], 34, 34
263; CHECK-DAG: li [[C1:[0-9]+]], 48
264; CHECK-DAG: li [[C2:[0-9]+]], 32
265; CHECK-DAG: xvmaddmdp 37, 35, 34
266; CHECK-DAG: li [[C3:[0-9]+]], 16
267
268; Note: We could convert this next FMA to M-type as well, but it would require
269; re-ordering the instructions.
270; CHECK-DAG: xvmaddadp [[V1]], 35, 36
271
272; CHECK-DAG: xvmaddmdp 36, 35, 37
273; CHECK-DAG: xvmaddadp 34, 35, 38
274; CHECK-DAG: stxvd2x 32, 0, 3
275; CHECK-DAG: stxvd2x 36, 3, [[C1]]
276; CHECK-DAG: stxvd2x 34, 3, [[C2]]
277; CHECK-DAG: stxvd2x 37, 3, [[C3]]
278; CHECK: blr
279
280; CHECK-FISL-LABEL: @testv3
281; CHECK-FISL-DAG: xxlor [[V1:[0-9]+]], 34, 34
282; CHECK-FISL-DAG: xvmaddadp [[V1]], 35, 36
283; CHECK-FISL-DAG: stxvd2x [[V1]], 0, 3
284; CHECK-FISL-DAG: xxlor [[V2:[0-9]+]], 34, 34
285; CHECK-FISL-DAG: xvmaddadp [[V2]], 35, 37
286; CHECK-FISL-DAG: xxlor [[V3:[0-9]+]], 0, 0
287; CHECK-FISL-DAG: xvmaddadp [[V3]], 35, 36
288; CHECK-FISL-DAG: li [[C1:[0-9]+]], 48
289; CHECK-FISL-DAG: stxvd2x [[V3]], 3, [[C1]]
290; CHECK-FISL-DAG: xvmaddadp 34, 35, 38
291; CHECK-FISL-DAG: li [[C2:[0-9]+]], 32
292; CHECK-FISL-DAG: stxvd2x 34, 3, [[C2]]
293; CHECK-FISL-DAG: li [[C3:[0-9]+]], 16
294; CHECK-FISL-DAG: stxvd2x 0, 3, [[C3]]
295; CHECK-FISL: blr
296}
297
298define void @testv4(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
299entry:
300  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
301  store <2 x double> %0, <2 x double>* %d, align 8
302  %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
303  %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1
304  store <2 x double> %1, <2 x double>* %arrayidx1, align 8
305  %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %1)
306  %arrayidx3 = getelementptr inbounds <2 x double>* %d, i64 3
307  store <2 x double> %2, <2 x double>* %arrayidx3, align 8
308  %3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
309  %arrayidx4 = getelementptr inbounds <2 x double>* %d, i64 2
310  store <2 x double> %3, <2 x double>* %arrayidx4, align 8
311  ret void
312
313; CHECK-LABEL: @testv4
314; CHECK-DAG: xxlor [[V1:[0-9]+]], 34, 34
315; CHECK-DAG: xvmaddmdp 37, 35, 34
316; CHECK-DAG: li [[C1:[0-9]+]], 16
317; CHECK-DAG: li [[C2:[0-9]+]], 32
318; CHECK-DAG: xvmaddadp 34, 35, 38
319
320; Note: We could convert this next FMA to M-type as well, but it would require
321; re-ordering the instructions.
322; CHECK-DAG: xvmaddadp [[V1]], 35, 36
323
324; CHECK-DAG: stxvd2x 32, 0, 3
325; CHECK-DAG: stxvd2x 37, 3, [[C1]]
326; CHECK-DAG: li [[C3:[0-9]+]], 48
327; CHECK-DAG: xvmaddadp 37, 35, 36
328; CHECK-DAG: stxvd2x 37, 3, [[C3]]
329; CHECK-DAG: stxvd2x 34, 3, [[C2]]
330; CHECK: blr
331
332; CHECK-FISL-LABEL: @testv4
333; CHECK-FISL-DAG: xxlor [[V1:[0-9]+]], 34, 34
334; CHECK-FISL-DAG: xvmaddadp [[V1]], 35, 36
335; CHECK-FISL-DAG: stxvd2x 0, 0, 3
336; CHECK-FISL-DAG: xxlor [[V2:[0-9]+]], 34, 34
337; CHECK-FISL-DAG: xvmaddadp [[V2]], 35, 37
338; CHECK-FISL-DAG: li [[C1:[0-9]+]], 16
339; CHECK-FISL-DAG: stxvd2x 0, 3, [[C1]]
340; CHECK-FISL-DAG: xvmaddadp 0, 35, 37
341; CHECK-FISL-DAG: li [[C3:[0-9]+]], 48
342; CHECK-FISL-DAG: stxvd2x 0, 3, [[C3]]
343; CHECK-FISL-DAG: xvmaddadp 0, 35, 36
344; CHECK-FISL-DAG: li [[C2:[0-9]+]], 32
345; CHECK-FISL-DAG: stxvd2x 34, 3, [[C2]]
346; CHECK-FISL: blr
347}
348
349declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
350
351attributes #0 = { nounwind readnone }
352
353