1; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr7 < %s | FileCheck  %s -check-prefix=CHECK -check-prefix=CHECK-PWR
2; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr9 < %s | FileCheck  %s -check-prefix=FIXPOINT
3target datalayout = "E-m:e-i64:64-n32:64"
4target triple = "powerpc64-unknown-linux-gnu"
5
6; Verify that the first two adds are independent regardless of how the inputs are
7; commuted. The destination registers are used as source registers for the third add.
8
9define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
10; CHECK-LABEL: reassociate_adds1:
11; CHECK:       # %bb.0:
12; CHECK:       fadds [[REG0:[0-9]+]], 1, 2
13; CHECK:       fadds [[REG1:[0-9]+]], 3, 4
14; CHECK:       fadds 1, [[REG0]], [[REG1]]
15; CHECK-NEXT:  blr
16
17  %t0 = fadd reassoc nsz float %x0, %x1
18  %t1 = fadd reassoc nsz float %t0, %x2
19  %t2 = fadd reassoc nsz float %t1, %x3
20  ret float %t2
21}
22
23define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
24; CHECK-LABEL: reassociate_adds2:
25; CHECK:       # %bb.0:
26; CHECK:       fadds [[REG0:[0-9]+]], 1, 2
27; CHECK:       fadds [[REG1:[0-9]+]], 3, 4
28; CHECK:       fadds 1, [[REG0]], [[REG1]]
29; CHECK-NEXT:  blr
30
31  %t0 = fadd reassoc nsz float %x0, %x1
32  %t1 = fadd reassoc nsz float %x2, %t0
33  %t2 = fadd reassoc nsz float %t1, %x3
34  ret float %t2
35}
36
37define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
38; CHECK-LABEL: reassociate_adds3:
39; CHECK:       # %bb.0:
40; CHECK:       fadds [[REG0:[0-9]+]], 1, 2
41; CHECK:       fadds [[REG1:[0-9]+]], 3, 4
42; CHECK:       fadds 1, [[REG0]], [[REG1]]
43; CHECK-NEXT:  blr
44
45  %t0 = fadd reassoc nsz float %x0, %x1
46  %t1 = fadd reassoc nsz float %t0, %x2
47  %t2 = fadd reassoc nsz float %x3, %t1
48  ret float %t2
49}
50
51define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
52; CHECK-LABEL: reassociate_adds4:
53; CHECK:       # %bb.0:
54; CHECK:       fadds [[REG0:[0-9]+]], 1, 2
55; CHECK:       fadds [[REG1:[0-9]+]], 3, 4
56; CHECK:       fadds 1, [[REG0]], [[REG1]]
57; CHECK-NEXT:  blr
58
59  %t0 = fadd reassoc nsz float %x0, %x1
60  %t1 = fadd reassoc nsz float %x2, %t0
61  %t2 = fadd reassoc nsz float %x3, %t1
62  ret float %t2
63}
64
65; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
66; produced because that would cost more compile time.
67
68define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
69; CHECK-LABEL: reassociate_adds5:
70; CHECK:       # %bb.0:
71; CHECK-DAG:   fadds [[REG12:[0-9]+]], 5, 6
72; CHECK-DAG:   fadds [[REG0:[0-9]+]], 1, 2
73; CHECK-DAG:   fadds [[REG11:[0-9]+]], 3, 4
74; CHECK-DAG:   fadds [[REG13:[0-9]+]], [[REG12]], 7
75; CHECK-DAG:   fadds [[REG1:[0-9]+]], [[REG0]], [[REG11]]
76; CHECK-DAG:   fadds [[REG2:[0-9]+]], [[REG1]], [[REG13]]
77; CHECK:       fadds 1, [[REG2]], 8
78; CHECK-NEXT:    blr
79
80  %t0 = fadd reassoc nsz float %x0, %x1
81  %t1 = fadd reassoc nsz float %t0, %x2
82  %t2 = fadd reassoc nsz float %t1, %x3
83  %t3 = fadd reassoc nsz float %t2, %x4
84  %t4 = fadd reassoc nsz float %t3, %x5
85  %t5 = fadd reassoc nsz float %t4, %x6
86  %t6 = fadd reassoc nsz float %t5, %x7
87  ret float %t6
88}
89
90; Verify that we reassociate vector instructions too.
91
92define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
93; CHECK-LABEL: vector_reassociate_adds1:
94; CHECK:       # %bb.0:
95; CHECK-PWR:       xvaddsp [[REG0:[0-9]+]], 34, 35
96; CHECK-PWR:       xvaddsp [[REG1:[0-9]+]], 36, 37
97; CHECK-PWR:       xvaddsp 34, [[REG0]], [[REG1]]
98; CHECK-NEXT:  blr
99
100  %t0 = fadd reassoc nsz <4 x float> %x0, %x1
101  %t1 = fadd reassoc nsz <4 x float> %t0, %x2
102  %t2 = fadd reassoc nsz <4 x float> %t1, %x3
103  ret <4 x float> %t2
104}
105
106define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
107; CHECK-LABEL: vector_reassociate_adds2:
108; CHECK:       # %bb.0:
109; CHECK-PWR:       xvaddsp [[REG0:[0-9]+]], 34, 35
110; CHECK-PWR:       xvaddsp [[REG1:[0-9]+]], 36, 37
111; CHECK-PWR:       xvaddsp 34, [[REG0]], [[REG1]]
112; CHECK-NEXT:  blr
113
114  %t0 = fadd reassoc nsz <4 x float> %x0, %x1
115  %t1 = fadd reassoc nsz <4 x float> %x2, %t0
116  %t2 = fadd reassoc nsz <4 x float> %t1, %x3
117  ret <4 x float> %t2
118}
119
120define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
121; CHECK-LABEL: vector_reassociate_adds3:
122; CHECK:       # %bb.0:
123; CHECK-PWR:       xvaddsp [[REG0:[0-9]+]], 34, 35
124; CHECK-PWR:       xvaddsp [[REG1:[0-9]+]], 36, 37
125; CHECK-PWR:       xvaddsp 34, [[REG0]], [[REG1]]
126; CHECK-NEXT:  blr
127
128  %t0 = fadd reassoc nsz <4 x float> %x0, %x1
129  %t1 = fadd reassoc nsz <4 x float> %t0, %x2
130  %t2 = fadd reassoc nsz <4 x float> %x3, %t1
131  ret <4 x float> %t2
132}
133
134define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
135; CHECK-LABEL: vector_reassociate_adds4:
136; CHECK:       # %bb.0:
137; CHECK-PWR:       xvaddsp [[REG0:[0-9]+]], 34, 35
138; CHECK-PWR:       xvaddsp [[REG1:[0-9]+]], 36, 37
139; CHECK-PWR:       xvaddsp 34, [[REG0]], [[REG1]]
140; CHECK-NEXT:  blr
141
142  %t0 = fadd reassoc nsz <4 x float> %x0, %x1
143  %t1 = fadd reassoc nsz <4 x float> %x2, %t0
144  %t2 = fadd reassoc nsz <4 x float> %x3, %t1
145  ret <4 x float> %t2
146}
147
148define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
149  %t0 = fdiv float %x0, %x1
150  %t1 = fadd float %x2, %t0
151  %t2 = fadd float %x3, %t1
152  ret float %t2
153}
154
155define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
156  %t0 = fdiv float %x0, %x1
157  %t1 = fmul float %x2, %t0
158  %t2 = fmul float %x3, %t1
159  ret float %t2
160}
161
162define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
163  %t0 = fdiv double %x0, %x1
164  %t1 = fadd double %x2, %t0
165  %t2 = fadd double %x3, %t1
166  ret double %t2
167}
168
169define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
170  %t0 = fdiv double %x0, %x1
171  %t1 = fmul double %x2, %t0
172  %t2 = fmul double %x3, %t1
173  ret double %t2
174}
175
176define i32 @reassociate_mullw(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
177; FIXPOINT-LABEL: reassociate_mullw:
178; FIXPOINT:       # %bb.0:
179; FIXPOINT:       mullw [[REG0:[0-9]+]], 3, 4
180; FIXPOINT:       mullw [[REG1:[0-9]+]], 5, 6
181; FIXPOINT:       mullw 3, [[REG0]], [[REG1]]
182; FIXPOINT-NEXT:  blr
183
184  %t0 = mul i32 %x0, %x1
185  %t1 = mul i32 %t0, %x2
186  %t2 = mul i32 %t1, %x3
187  ret i32 %t2
188}
189
190define i64 @reassociate_mulld(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
191; FIXPOINT-LABEL: reassociate_mulld:
192; FIXPOINT:       # %bb.0:
193; FIXPOINT:       mulld [[REG0:[0-9]+]], 3, 4
194; FIXPOINT:       mulld [[REG1:[0-9]+]], 5, 6
195; FIXPOINT:       mulld 3, [[REG0]], [[REG1]]
196; FIXPOINT-NEXT:  blr
197
198  %t0 = mul i64 %x0, %x1
199  %t1 = mul i64 %t0, %x2
200  %t2 = mul i64 %t1, %x3
201  ret i64 %t2
202}
203
204define double @reassociate_mamaa_double(double %0, double %1, double %2, double %3, double %4, double %5) {
205; CHECK-LABEL: reassociate_mamaa_double:
206; CHECK:       # %bb.0:
207; CHECK-PWR-DAG:   xsmaddadp 1, 6, 5
208; CHECK-PWR-DAG:   xsmaddadp 2, 4, 3
209; CHECK-PWR:       xsadddp 1, 2, 1
210; CHECK-NEXT:  blr
211  %7 = fmul reassoc nsz double %3, %2
212  %8 = fmul reassoc nsz double %5, %4
213  %9 = fadd reassoc nsz double %1, %0
214  %10 = fadd reassoc nsz double %9, %7
215  %11 = fadd reassoc nsz double %10, %8
216  ret double %11
217}
218
219define float @reassociate_mamaa_float(float %0, float %1, float %2, float %3, float %4, float %5) {
220; CHECK-LABEL: reassociate_mamaa_float:
221; CHECK:       # %bb.0:
222; CHECK-DAG:   fmadds [[REG0:[0-9]+]], 4, 3, 2
223; CHECK-DAG:   fmadds [[REG1:[0-9]+]], 6, 5, 1
224; CHECK:       fadds 1, [[REG0]], [[REG1]]
225; CHECK-NEXT:  blr
226  %7 = fmul reassoc nsz float %3, %2
227  %8 = fmul reassoc nsz float %5, %4
228  %9 = fadd reassoc nsz float %1, %0
229  %10 = fadd reassoc nsz float %9, %7
230  %11 = fadd reassoc nsz float %10, %8
231  ret float %11
232}
233
234define <4 x float> @reassociate_mamaa_vec(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, <4 x float> %5) {
235; CHECK-LABEL: reassociate_mamaa_vec:
236; CHECK:       # %bb.0:
237; CHECK-PWR-DAG:   xvmaddasp [[REG0:[0-9]+]], 39, 38
238; CHECK-PWR-DAG:   xvmaddasp [[REG1:[0-9]+]], 37, 36
239; CHECK-PWR:       xvaddsp 34, [[REG1]], [[REG0]]
240; CHECK-NEXT:  blr
241  %7 = fmul reassoc nsz <4 x float> %3, %2
242  %8 = fmul reassoc nsz <4 x float> %5, %4
243  %9 = fadd reassoc nsz <4 x float> %1, %0
244  %10 = fadd reassoc nsz <4 x float> %9, %7
245  %11 = fadd reassoc nsz <4 x float> %10, %8
246  ret <4 x float> %11
247}
248
249define double @reassociate_mamama_double(double %0, double %1, double %2, double %3, double %4, double %5, double %6, double %7, double %8) {
250; CHECK-LABEL: reassociate_mamama_double:
251; CHECK:       # %bb.0:
252; CHECK-PWR:       xsmaddadp 7, 2, 1
253; CHECK-PWR-DAG:   xsmuldp [[REG0:[0-9]+]], 4, 3
254; CHECK-PWR-DAG:   xsmaddadp 7, 6, 5
255; CHECK-PWR-DAG:   xsmaddadp [[REG0]], 9, 8
256; CHECK-PWR:       xsadddp 1, 7, [[REG0]]
257; CHECK-NEXT:  blr
258  %10 = fmul reassoc nsz double %1, %0
259  %11 = fmul reassoc nsz double %3, %2
260  %12 = fmul reassoc nsz double %5, %4
261  %13 = fmul reassoc nsz double %8, %7
262  %14 = fadd reassoc nsz double %11, %10
263  %15 = fadd reassoc nsz double %14, %6
264  %16 = fadd reassoc nsz double %15, %12
265  %17 = fadd reassoc nsz double %16, %13
266  ret double %17
267}
268
269define dso_local float @reassociate_mamama_8(float %0, float %1, float %2, float %3, float %4, float %5, float %6, float %7, float %8,
270                                             float %9, float %10, float %11, float %12, float %13, float %14, float %15, float %16) {
271; CHECK-LABEL: reassociate_mamama_8:
272; CHECK:       # %bb.0:
273; CHECK-DAG:    fmadds [[REG0:[0-9]+]], 3, 2, 1
274; CHECK-DAG:    fmuls  [[REG1:[0-9]+]], 5, 4
275; CHECK-DAG:    fmadds [[REG2:[0-9]+]], 7, 6, [[REG0]]
276; CHECK-DAG:    fmadds [[REG3:[0-9]+]], 9, 8, [[REG1]]
277;
278; CHECK-DAG:    fmadds [[REG4:[0-9]+]], 13, 12, [[REG3]]
279; CHECK-DAG:    fmadds [[REG5:[0-9]+]], 11, 10, [[REG2]]
280;
281; CHECK-DAG:    fmadds [[REG6:[0-9]+]], 3, 2, [[REG4]]
282; CHECK-DAG:    fmadds [[REG7:[0-9]+]], 5, 4, [[REG5]]
283; CHECK:        fadds 1, [[REG7]], [[REG6]]
284; CHECK-NEXT:   blr
285  %18 = fmul reassoc nsz float %2, %1
286  %19 = fadd reassoc nsz float %18, %0
287  %20 = fmul reassoc nsz float %4, %3
288  %21 = fadd reassoc nsz float %19, %20
289  %22 = fmul reassoc nsz float %6, %5
290  %23 = fadd reassoc nsz float %21, %22
291  %24 = fmul reassoc nsz float %8, %7
292  %25 = fadd reassoc nsz float %23, %24
293  %26 = fmul reassoc nsz float %10, %9
294  %27 = fadd reassoc nsz float %25, %26
295  %28 = fmul reassoc nsz float %12, %11
296  %29 = fadd reassoc nsz float %27, %28
297  %30 = fmul reassoc nsz float %14, %13
298  %31 = fadd reassoc nsz float %29, %30
299  %32 = fmul reassoc nsz float %16, %15
300  %33 = fadd reassoc nsz float %31, %32
301  ret float %33
302}
303
304