1; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 | FileCheck -check-prefixes=CHECK-PWR9 %s
2; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-PWR8 %s
3
4; Exponent is a variable
5define void @vpow_var(double* nocapture %z, double* nocapture readonly %y, double* nocapture readonly %x) {
6; CHECK-LABEL:       @vpow_var
7; CHECK-PWR9:        bl __powd2_P9
8; CHECK-PWR8:        bl __powd2_P8
9; CHECK:             blr
10entry:
11  br label %vector.body
12
13vector.body:
14  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
15  %next.gep = getelementptr double, double* %z, i64 %index
16  %next.gep31 = getelementptr double, double* %y, i64 %index
17  %next.gep32 = getelementptr double, double* %x, i64 %index
18  %0 = bitcast double* %next.gep32 to <2 x double>*
19  %wide.load = load <2 x double>, <2 x double>* %0, align 8
20  %1 = bitcast double* %next.gep31 to <2 x double>*
21  %wide.load33 = load <2 x double>, <2 x double>* %1, align 8
22  %2 = call ninf afn nsz <2 x double> @__powd2_massv(<2 x double> %wide.load, <2 x double> %wide.load33)
23  %3 = bitcast double* %next.gep to <2 x double>*
24  store <2 x double> %2, <2 x double>* %3, align 8
25  %index.next = add i64 %index, 2
26  %4 = icmp eq i64 %index.next, 1024
27  br i1 %4, label %for.end, label %vector.body
28
29for.end:
30  ret void
31}
32
33; Exponent is a constant != 0.75 and !=0.25
34define void @vpow_const(double* nocapture %y, double* nocapture readonly %x) {
35; CHECK-LABEL:       @vpow_const
36; CHECK-PWR9:        bl __powd2_P9
37; CHECK-PWR8:        bl __powd2_P8
38; CHECK:             blr
39entry:
40  br label %vector.body
41
42vector.body:
43  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
44  %next.gep = getelementptr double, double* %y, i64 %index
45  %next.gep19 = getelementptr double, double* %x, i64 %index
46  %0 = bitcast double* %next.gep19 to <2 x double>*
47  %wide.load = load <2 x double>, <2 x double>* %0, align 8
48  %1 = call ninf afn nsz <2 x double> @__powd2_massv(<2 x double> %wide.load, <2 x double> <double 7.600000e-01, double 7.600000e-01>)
49  %2 = bitcast double* %next.gep to <2 x double>*
50  store <2 x double> %1, <2 x double>* %2, align 8
51  %index.next = add i64 %index, 2
52  %3 = icmp eq i64 %index.next, 1024
53  br i1 %3, label %for.end, label %vector.body
54
55for.end:
56  ret void
57}
58
59; Exponent is a constant != 0.75 and !=0.25 and they are different
60define void @vpow_noeq_const(double* nocapture %y, double* nocapture readonly %x) {
61; CHECK-LABEL:       @vpow_noeq_const
62; CHECK-PWR9:        bl __powd2_P9
63; CHECK-PWR8:        bl __powd2_P8
64; CHECK:             blr
65entry:
66  br label %vector.body
67
68vector.body:
69  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
70  %next.gep = getelementptr double, double* %y, i64 %index
71  %next.gep19 = getelementptr double, double* %x, i64 %index
72  %0 = bitcast double* %next.gep19 to <2 x double>*
73  %wide.load = load <2 x double>, <2 x double>* %0, align 8
74  %1 = call ninf afn nsz <2 x double> @__powd2_massv(<2 x double> %wide.load, <2 x double> <double 7.700000e-01, double 7.600000e-01>)
75  %2 = bitcast double* %next.gep to <2 x double>*
76  store <2 x double> %1, <2 x double>* %2, align 8
77  %index.next = add i64 %index, 2
78  %3 = icmp eq i64 %index.next, 1024
79  br i1 %3, label %for.end, label %vector.body
80
81for.end:
82  ret void
83}
84
85; Exponent is a constant != 0.75 and !=0.25 and they are different
86define void @vpow_noeq075_const(double* nocapture %y, double* nocapture readonly %x) {
87; CHECK-LABEL:       @vpow_noeq075_const
88; CHECK-PWR9:        bl __powd2_P9
89; CHECK-PWR8:        bl __powd2_P8
90; CHECK:             blr
91entry:
92  br label %vector.body
93
94vector.body:
95  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
96  %next.gep = getelementptr double, double* %y, i64 %index
97  %next.gep19 = getelementptr double, double* %x, i64 %index
98  %0 = bitcast double* %next.gep19 to <2 x double>*
99  %wide.load = load <2 x double>, <2 x double>* %0, align 8
100  %1 = call ninf afn nsz <2 x double> @__powd2_massv(<2 x double> %wide.load, <2 x double> <double 7.700000e-01, double 7.500000e-01>)
101  %2 = bitcast double* %next.gep to <2 x double>*
102  store <2 x double> %1, <2 x double>* %2, align 8
103  %index.next = add i64 %index, 2
104  %3 = icmp eq i64 %index.next, 1024
105  br i1 %3, label %for.end, label %vector.body
106
107for.end:
108  ret void
109}
110
111; Exponent is a constant != 0.75 and !=0.25 and they are different
112define void @vpow_noeq025_const(double* nocapture %y, double* nocapture readonly %x) {
113; CHECK-LABEL:       @vpow_noeq025_const
114; CHECK-PWR9:        bl __powd2_P9
115; CHECK-PWR8:        bl __powd2_P8
116; CHECK:             blr
117entry:
118  br label %vector.body
119
120vector.body:
121  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
122  %next.gep = getelementptr double, double* %y, i64 %index
123  %next.gep19 = getelementptr double, double* %x, i64 %index
124  %0 = bitcast double* %next.gep19 to <2 x double>*
125  %wide.load = load <2 x double>, <2 x double>* %0, align 8
126  %1 = call ninf afn nsz <2 x double> @__powd2_massv(<2 x double> %wide.load, <2 x double> <double 7.700000e-01, double 2.500000e-01>)
127  %2 = bitcast double* %next.gep to <2 x double>*
128  store <2 x double> %1, <2 x double>* %2, align 8
129  %index.next = add i64 %index, 2
130  %3 = icmp eq i64 %index.next, 1024
131  br i1 %3, label %for.end, label %vector.body
132
133for.end:
134  ret void
135}
136
137; Exponent is 0.75
138define void @vpow_075(double* nocapture %y, double* nocapture readonly %x) {
139; CHECK-LABEL:       @vpow_075
140; CHECK-NOT:         bl __powd2_P{{[8,9]}}
141; CHECK:             xvrsqrtesp
142; CHECK:             blr
143entry:
144  br label %vector.body
145
146vector.body:
147  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
148  %next.gep = getelementptr double, double* %y, i64 %index
149  %next.gep19 = getelementptr double, double* %x, i64 %index
150  %0 = bitcast double* %next.gep19 to <2 x double>*
151  %wide.load = load <2 x double>, <2 x double>* %0, align 8
152  %1 = call ninf afn <2 x double> @__powd2_massv(<2 x double> %wide.load, <2 x double> <double 7.500000e-01, double 7.500000e-01>)
153  %2 = bitcast double* %next.gep to <2 x double>*
154  store <2 x double> %1, <2 x double>* %2, align 8
155  %index.next = add i64 %index, 2
156  %3 = icmp eq i64 %index.next, 1024
157  br i1 %3, label %for.end, label %vector.body
158
159for.end:
160  ret void
161}
162
163; Exponent is 0.25
164define void @vpow_025(double* nocapture %y, double* nocapture readonly %x) {
165; CHECK-LABEL:       @vpow_025
166; CHECK-NOT:         bl __powd2_P{{[8,9]}}
167; CHECK:             xvrsqrtesp
168; CHECK:             blr
169entry:
170  br label %vector.body
171
172vector.body:
173  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
174  %next.gep = getelementptr double, double* %y, i64 %index
175  %next.gep19 = getelementptr double, double* %x, i64 %index
176  %0 = bitcast double* %next.gep19 to <2 x double>*
177  %wide.load = load <2 x double>, <2 x double>* %0, align 8
178  %1 = call ninf afn nsz <2 x double> @__powd2_massv(<2 x double> %wide.load, <2 x double> <double 2.500000e-01, double 2.500000e-01>)
179  %2 = bitcast double* %next.gep to <2 x double>*
180  store <2 x double> %1, <2 x double>* %2, align 8
181  %index.next = add i64 %index, 2
182  %3 = icmp eq i64 %index.next, 1024
183  br i1 %3, label %for.end, label %vector.body
184
185for.end:
186  ret void
187}
188
189; Exponent is 0.75 but no proper fast-math flags
190define void @vpow_075_nofast(double* nocapture %y, double* nocapture readonly %x) {
191; CHECK-LABEL:       @vpow_075_nofast
192; CHECK-PWR9:        bl __powd2_P9
193; CHECK-PWR8:        bl __powd2_P8
194; CHECK-NOT:         xvrsqrtesp
195; CHECK:             blr
196entry:
197  br label %vector.body
198
199vector.body:
200  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
201  %next.gep = getelementptr double, double* %y, i64 %index
202  %next.gep19 = getelementptr double, double* %x, i64 %index
203  %0 = bitcast double* %next.gep19 to <2 x double>*
204  %wide.load = load <2 x double>, <2 x double>* %0, align 8
205  %1 = call <2 x double> @__powd2_massv(<2 x double> %wide.load, <2 x double> <double 7.500000e-01, double 7.500000e-01>)
206  %2 = bitcast double* %next.gep to <2 x double>*
207  store <2 x double> %1, <2 x double>* %2, align 8
208  %index.next = add i64 %index, 2
209  %3 = icmp eq i64 %index.next, 1024
210  br i1 %3, label %for.end, label %vector.body
211
212for.end:
213  ret void
214}
215
216; Exponent is 0.25 but no proper fast-math flags
217define void @vpow_025_nofast(double* nocapture %y, double* nocapture readonly %x) {
218; CHECK-LABEL:       @vpow_025_nofast
219; CHECK-PWR9:        bl __powd2_P9
220; CHECK-PWR8:        bl __powd2_P8
221; CHECK-NOT:         xvrsqrtesp
222; CHECK:             blr
223entry:
224  br label %vector.body
225
226vector.body:
227  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
228  %next.gep = getelementptr double, double* %y, i64 %index
229  %next.gep19 = getelementptr double, double* %x, i64 %index
230  %0 = bitcast double* %next.gep19 to <2 x double>*
231  %wide.load = load <2 x double>, <2 x double>* %0, align 8
232  %1 = call <2 x double> @__powd2_massv(<2 x double> %wide.load, <2 x double> <double 2.500000e-01, double 2.500000e-01>)
233  %2 = bitcast double* %next.gep to <2 x double>*
234  store <2 x double> %1, <2 x double>* %2, align 8
235  %index.next = add i64 %index, 2
236  %3 = icmp eq i64 %index.next, 1024
237  br i1 %3, label %for.end, label %vector.body
238
239for.end:
240  ret void
241}
242
243; Function Attrs: nounwind readnone speculatable willreturn
244declare <2 x double> @__powd2_massv(<2 x double>, <2 x double>) #1
245