1target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
2; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
3
4declare double @llvm.fma.f64(double, double, double)
5declare double @llvm.fmuladd.f64(double, double, double)
6declare double @llvm.cos.f64(double)
7declare double @llvm.powi.f64(double, i32)
8declare double @llvm.round.f64(double)
9declare double @llvm.copysign.f64(double, double)
10declare double @llvm.ceil.f64(double)
11declare double @llvm.nearbyint.f64(double)
12declare double @llvm.rint.f64(double)
13declare double @llvm.trunc.f64(double)
14declare double @llvm.floor.f64(double)
15declare double @llvm.fabs.f64(double)
16declare i64 @llvm.bswap.i64(i64)
17declare i64 @llvm.ctpop.i64(i64)
18declare i64 @llvm.ctlz.i64(i64, i1)
19declare i64 @llvm.cttz.i64(i64, i1)
20
21; Basic depth-3 chain with fma
22define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
23	%X1 = fsub double %A1, %B1
24	%X2 = fsub double %A2, %B2
25	%Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1)
26	%Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2)
27	%Z1 = fadd double %Y1, %B1
28	%Z2 = fadd double %Y2, %B2
29	%R  = fmul double %Z1, %Z2
30	ret double %R
31; CHECK-LABEL: @test1(
32; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
33; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
34; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
35; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
36; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
37; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0
38; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1
39; CHECK: %Y1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2)
40; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
41; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
42; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
43; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
44; CHECK: ret double %R
45}
46
47; Basic depth-3 chain with fmuladd
48define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
49	%X1 = fsub double %A1, %B1
50	%X2 = fsub double %A2, %B2
51	%Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1)
52	%Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2)
53	%Z1 = fadd double %Y1, %B1
54	%Z2 = fadd double %Y2, %B2
55	%R  = fmul double %Z1, %Z2
56	ret double %R
57; CHECK-LABEL: @test1a(
58; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
59; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
60; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
61; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
62; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
63; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0
64; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1
65; CHECK: %Y1 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2)
66; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
67; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
68; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
69; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
70; CHECK: ret double %R
71}
72
73; Basic depth-3 chain with cos
74define double @test2(double %A1, double %A2, double %B1, double %B2) {
75	%X1 = fsub double %A1, %B1
76	%X2 = fsub double %A2, %B2
77	%Y1 = call double @llvm.cos.f64(double %X1)
78	%Y2 = call double @llvm.cos.f64(double %X2)
79	%Z1 = fadd double %Y1, %B1
80	%Z2 = fadd double %Y2, %B2
81	%R  = fmul double %Z1, %Z2
82	ret double %R
83; CHECK-LABEL: @test2(
84; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
85; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
86; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
87; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
88; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
89; CHECK: %Y1 = call <2 x double> @llvm.cos.v2f64(<2 x double> %X1)
90; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
91; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
92; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
93; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
94; CHECK: ret double %R
95}
96
97; Basic depth-3 chain with powi
98define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) {
99
100	%X1 = fsub double %A1, %B1
101	%X2 = fsub double %A2, %B2
102	%Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
103	%Y2 = call double @llvm.powi.f64(double %X2, i32 %P)
104	%Z1 = fadd double %Y1, %B1
105	%Z2 = fadd double %Y2, %B2
106	%R  = fmul double %Z1, %Z2
107	ret double %R
108; CHECK-LABEL: @test3(
109; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
110; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
111; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
112; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
113; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
114; CHECK: %Y1 = call <2 x double> @llvm.powi.v2f64(<2 x double> %X1, i32 %P)
115; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
116; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
117; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
118; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
119; CHECK: ret double %R
120}
121
122; Basic depth-3 chain with powi (different powers: should not vectorize)
123define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
124
125	%X1 = fsub double %A1, %B1
126	%X2 = fsub double %A2, %B2
127        %P2 = add i32 %P, 1
128	%Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
129	%Y2 = call double @llvm.powi.f64(double %X2, i32 %P2)
130	%Z1 = fadd double %Y1, %B1
131	%Z2 = fadd double %Y2, %B2
132	%R  = fmul double %Z1, %Z2
133	ret double %R
134; CHECK-LABEL: @test4(
135; CHECK-NOT: <2 x double>
136; CHECK: ret double %R
137}
138
139; Basic depth-3 chain with round
140define double @testround(double %A1, double %A2, double %B1, double %B2) {
141	%X1 = fsub double %A1, %B1
142	%X2 = fsub double %A2, %B2
143	%Y1 = call double @llvm.round.f64(double %X1)
144	%Y2 = call double @llvm.round.f64(double %X2)
145	%Z1 = fadd double %Y1, %B1
146	%Z2 = fadd double %Y2, %B2
147	%R  = fmul double %Z1, %Z2
148	ret double %R
149; CHECK: @testround
150; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
151; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
152; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
153; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
154; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
155; CHECK: %Y1 = call <2 x double> @llvm.round.v2f64(<2 x double> %X1)
156; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
157; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
158; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
159; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
160; CHECK: ret double %R
161
162}
163
164; Basic depth-3 chain with copysign
165define double @testcopysign(double %A1, double %A2, double %B1, double %B2) {
166	%X1 = fsub double %A1, %B1
167	%X2 = fsub double %A2, %B2
168	%Y1 = call double @llvm.copysign.f64(double %X1, double %A1)
169	%Y2 = call double @llvm.copysign.f64(double %X2, double %A1)
170	%Z1 = fadd double %Y1, %B1
171	%Z2 = fadd double %Y2, %B2
172	%R  = fmul double %Z1, %Z2
173	ret double %R
174; CHECK: @testcopysign
175; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
176; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
177; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
178; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
179; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
180; CHECK: %Y1.v.i1.2 = insertelement <2 x double> %X1.v.i0.1, double %A1, i32 1
181; CHECK: %Y1 = call <2 x double> @llvm.copysign.v2f64(<2 x double> %X1, <2 x double> %Y1.v.i1.2)
182; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
183; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
184; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
185; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
186; CHECK: ret double %R
187
188}
189
190; Basic depth-3 chain with ceil
191define double @testceil(double %A1, double %A2, double %B1, double %B2) {
192	%X1 = fsub double %A1, %B1
193	%X2 = fsub double %A2, %B2
194	%Y1 = call double @llvm.ceil.f64(double %X1)
195	%Y2 = call double @llvm.ceil.f64(double %X2)
196	%Z1 = fadd double %Y1, %B1
197	%Z2 = fadd double %Y2, %B2
198	%R  = fmul double %Z1, %Z2
199	ret double %R
200; CHECK: @testceil
201; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
202; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
203; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
204; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
205; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
206; CHECK: %Y1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %X1)
207; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
208; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
209; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
210; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
211; CHECK: ret double %R
212
213}
214
215; Basic depth-3 chain with nearbyint
216define double @testnearbyint(double %A1, double %A2, double %B1, double %B2) {
217	%X1 = fsub double %A1, %B1
218	%X2 = fsub double %A2, %B2
219	%Y1 = call double @llvm.nearbyint.f64(double %X1)
220	%Y2 = call double @llvm.nearbyint.f64(double %X2)
221	%Z1 = fadd double %Y1, %B1
222	%Z2 = fadd double %Y2, %B2
223	%R  = fmul double %Z1, %Z2
224	ret double %R
225; CHECK: @testnearbyint
226; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
227; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
228; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
229; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
230; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
231; CHECK: %Y1 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %X1)
232; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
233; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
234; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
235; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
236; CHECK: ret double %R
237
238}
239
240; Basic depth-3 chain with rint
241define double @testrint(double %A1, double %A2, double %B1, double %B2) {
242	%X1 = fsub double %A1, %B1
243	%X2 = fsub double %A2, %B2
244	%Y1 = call double @llvm.rint.f64(double %X1)
245	%Y2 = call double @llvm.rint.f64(double %X2)
246	%Z1 = fadd double %Y1, %B1
247	%Z2 = fadd double %Y2, %B2
248	%R  = fmul double %Z1, %Z2
249	ret double %R
250; CHECK: @testrint
251; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
252; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
253; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
254; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
255; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
256; CHECK: %Y1 = call <2 x double> @llvm.rint.v2f64(<2 x double> %X1)
257; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
258; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
259; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
260; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
261; CHECK: ret double %R
262
263}
264
265; Basic depth-3 chain with trunc
266define double @testtrunc(double %A1, double %A2, double %B1, double %B2) {
267	%X1 = fsub double %A1, %B1
268	%X2 = fsub double %A2, %B2
269	%Y1 = call double @llvm.trunc.f64(double %X1)
270	%Y2 = call double @llvm.trunc.f64(double %X2)
271	%Z1 = fadd double %Y1, %B1
272	%Z2 = fadd double %Y2, %B2
273	%R  = fmul double %Z1, %Z2
274	ret double %R
275; CHECK: @testtrunc
276; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
277; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
278; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
279; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
280; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
281; CHECK: %Y1 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %X1)
282; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
283; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
284; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
285; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
286; CHECK: ret double %R
287
288}
289
290; Basic depth-3 chain with floor
291define double @testfloor(double %A1, double %A2, double %B1, double %B2) {
292	%X1 = fsub double %A1, %B1
293	%X2 = fsub double %A2, %B2
294	%Y1 = call double @llvm.floor.f64(double %X1)
295	%Y2 = call double @llvm.floor.f64(double %X2)
296	%Z1 = fadd double %Y1, %B1
297	%Z2 = fadd double %Y2, %B2
298	%R  = fmul double %Z1, %Z2
299	ret double %R
300; CHECK: @testfloor
301; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
302; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
303; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
304; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
305; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
306; CHECK: %Y1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %X1)
307; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
308; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
309; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
310; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
311; CHECK: ret double %R
312
313}
314
315; Basic depth-3 chain with fabs
316define double @testfabs(double %A1, double %A2, double %B1, double %B2) {
317	%X1 = fsub double %A1, %B1
318	%X2 = fsub double %A2, %B2
319	%Y1 = call double @llvm.fabs.f64(double %X1)
320	%Y2 = call double @llvm.fabs.f64(double %X2)
321	%Z1 = fadd double %Y1, %B1
322	%Z2 = fadd double %Y2, %B2
323	%R  = fmul double %Z1, %Z2
324	ret double %R
325; CHECK: @testfabs
326; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
327; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
328; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
329; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
330; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
331; CHECK: %Y1 = call <2 x double> @llvm.fabs.v2f64(<2 x double> %X1)
332; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
333; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
334; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
335; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
336; CHECK: ret double %R
337
338}
339
340; Basic depth-3 chain with bswap
341define i64 @testbswap(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
342	%X1 = sub i64 %A1, %B1
343	%X2 = sub i64 %A2, %B2
344	%Y1 = call i64 @llvm.bswap.i64(i64 %X1)
345	%Y2 = call i64 @llvm.bswap.i64(i64 %X2)
346	%Z1 = add i64 %Y1, %B1
347	%Z2 = add i64 %Y2, %B2
348	%R  = mul i64 %Z1, %Z2
349	ret i64 %R
350
351; CHECK: @testbswap
352; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0
353; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1
354; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0
355; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1
356; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2
357; CHECK: %Y1 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %X1)
358; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2
359; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0
360; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1
361; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2
362; CHECK: ret i64 %R
363
364}
365
366; Basic depth-3 chain with ctpop
367define i64 @testctpop(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
368	%X1 = sub i64 %A1, %B1
369	%X2 = sub i64 %A2, %B2
370	%Y1 = call i64 @llvm.ctpop.i64(i64 %X1)
371	%Y2 = call i64 @llvm.ctpop.i64(i64 %X2)
372	%Z1 = add i64 %Y1, %B1
373	%Z2 = add i64 %Y2, %B2
374	%R  = mul i64 %Z1, %Z2
375	ret i64 %R
376
377; CHECK: @testctpop
378; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0
379; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1
380; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0
381; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1
382; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2
383; CHECK: %Y1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %X1)
384; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2
385; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0
386; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1
387; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2
388; CHECK: ret i64 %R
389
390}
391
392; Basic depth-3 chain with ctlz
393define i64 @testctlz(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
394	%X1 = sub i64 %A1, %B1
395	%X2 = sub i64 %A2, %B2
396	%Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true)
397	%Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 true)
398	%Z1 = add i64 %Y1, %B1
399	%Z2 = add i64 %Y2, %B2
400	%R  = mul i64 %Z1, %Z2
401	ret i64 %R
402
403; CHECK: @testctlz
404; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0
405; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1
406; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0
407; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1
408; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2
409; CHECK: %Y1 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %X1, i1 true)
410; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2
411; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0
412; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1
413; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2
414; CHECK: ret i64 %R
415
416}
417
418; Basic depth-3 chain with ctlz
419define i64 @testctlzneg(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
420	%X1 = sub i64 %A1, %B1
421	%X2 = sub i64 %A2, %B2
422	%Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true)
423	%Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 false)
424	%Z1 = add i64 %Y1, %B1
425	%Z2 = add i64 %Y2, %B2
426	%R  = mul i64 %Z1, %Z2
427	ret i64 %R
428
429; CHECK: @testctlzneg
430; CHECK: %X1 = sub i64 %A1, %B1
431; CHECK: %X2 = sub i64 %A2, %B2
432; CHECK: %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true)
433; CHECK: %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 false)
434; CHECK: %Z1 = add i64 %Y1, %B1
435; CHECK: %Z2 = add i64 %Y2, %B2
436; CHECK: %R = mul i64 %Z1, %Z2
437; CHECK: ret i64 %R
438}
439
440; Basic depth-3 chain with cttz
441define i64 @testcttz(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
442	%X1 = sub i64 %A1, %B1
443	%X2 = sub i64 %A2, %B2
444	%Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true)
445	%Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 true)
446	%Z1 = add i64 %Y1, %B1
447	%Z2 = add i64 %Y2, %B2
448	%R  = mul i64 %Z1, %Z2
449	ret i64 %R
450
451; CHECK: @testcttz
452; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0
453; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1
454; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0
455; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1
456; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2
457; CHECK: %Y1 = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %X1, i1 true)
458; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2
459; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0
460; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1
461; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2
462; CHECK: ret i64 %R
463
464}
465
466; Basic depth-3 chain with cttz
467define i64 @testcttzneg(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
468	%X1 = sub i64 %A1, %B1
469	%X2 = sub i64 %A2, %B2
470	%Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true)
471	%Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 false)
472	%Z1 = add i64 %Y1, %B1
473	%Z2 = add i64 %Y2, %B2
474	%R  = mul i64 %Z1, %Z2
475	ret i64 %R
476
477; CHECK: @testcttzneg
478; CHECK: %X1 = sub i64 %A1, %B1
479; CHECK: %X2 = sub i64 %A2, %B2
480; CHECK: %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true)
481; CHECK: %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 false)
482; CHECK: %Z1 = add i64 %Y1, %B1
483; CHECK: %Z2 = add i64 %Y2, %B2
484; CHECK: %R = mul i64 %Z1, %Z2
485; CHECK: ret i64 %R
486}
487
488
489
490; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
491; CHECK: declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
492; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) #0
493; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) #0
494; CHECK: declare <2 x double> @llvm.round.v2f64(<2 x double>) #0
495; CHECK: declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) #0
496; CHECK: declare <2 x double> @llvm.ceil.v2f64(<2 x double>) #0
497; CHECK: declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #0
498; CHECK: declare <2 x double> @llvm.rint.v2f64(<2 x double>) #0
499; CHECK: declare <2 x double> @llvm.trunc.v2f64(<2 x double>) #0
500; CHECK: declare <2 x double> @llvm.floor.v2f64(<2 x double>) #0
501; CHECK: declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #0
502; CHECK: declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) #0
503; CHECK: declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) #0
504; CHECK: declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) #0
505; CHECK: declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) #0
506; CHECK: attributes #0 = { nounwind readnone }
507