1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -instcombine < %s | FileCheck %s
3
4declare double @llvm.sqrt.f64(double) nounwind readnone speculatable
5declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
6declare void @use(double)
7
8; sqrt(a) * sqrt(b) no math flags
9
10define double @sqrt_a_sqrt_b(double %a, double %b) {
11; CHECK-LABEL: @sqrt_a_sqrt_b(
12; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.sqrt.f64(double [[A:%.*]])
13; CHECK-NEXT:    [[TMP2:%.*]] = call double @llvm.sqrt.f64(double [[B:%.*]])
14; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[TMP1]], [[TMP2]]
15; CHECK-NEXT:    ret double [[MUL]]
16;
17  %1 = call double @llvm.sqrt.f64(double %a)
18  %2 = call double @llvm.sqrt.f64(double %b)
19  %mul = fmul double %1, %2
20  ret double %mul
21}
22
23; sqrt(a) * sqrt(b) fast-math, multiple uses
24
25define double @sqrt_a_sqrt_b_multiple_uses(double %a, double %b) {
26; CHECK-LABEL: @sqrt_a_sqrt_b_multiple_uses(
27; CHECK-NEXT:    [[TMP1:%.*]] = call fast double @llvm.sqrt.f64(double [[A:%.*]])
28; CHECK-NEXT:    [[TMP2:%.*]] = call fast double @llvm.sqrt.f64(double [[B:%.*]])
29; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[TMP1]], [[TMP2]]
30; CHECK-NEXT:    call void @use(double [[TMP2]])
31; CHECK-NEXT:    ret double [[MUL]]
32;
33  %1 = call fast double @llvm.sqrt.f64(double %a)
34  %2 = call fast double @llvm.sqrt.f64(double %b)
35  %mul = fmul fast double %1, %2
36  call void @use(double %2)
37  ret double %mul
38}
39
40; sqrt(a) * sqrt(b) => sqrt(a*b) with fast-math
41
42define double @sqrt_a_sqrt_b_reassoc_nnan(double %a, double %b) {
43; CHECK-LABEL: @sqrt_a_sqrt_b_reassoc_nnan(
44; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nnan double [[A:%.*]], [[B:%.*]]
45; CHECK-NEXT:    [[TMP2:%.*]] = call reassoc nnan double @llvm.sqrt.f64(double [[TMP1]])
46; CHECK-NEXT:    ret double [[TMP2]]
47;
48  %1 = call double @llvm.sqrt.f64(double %a)
49  %2 = call double @llvm.sqrt.f64(double %b)
50  %mul = fmul reassoc nnan double %1, %2
51  ret double %mul
52}
53
54; nnan disallows the possibility that both operands are negative,
55; so we won't return a number when the answer should be NaN.
56
57define double @sqrt_a_sqrt_b_reassoc(double %a, double %b) {
58; CHECK-LABEL: @sqrt_a_sqrt_b_reassoc(
59; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.sqrt.f64(double [[A:%.*]])
60; CHECK-NEXT:    [[TMP2:%.*]] = call double @llvm.sqrt.f64(double [[B:%.*]])
61; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc double [[TMP1]], [[TMP2]]
62; CHECK-NEXT:    ret double [[MUL]]
63;
64  %1 = call double @llvm.sqrt.f64(double %a)
65  %2 = call double @llvm.sqrt.f64(double %b)
66  %mul = fmul reassoc double %1, %2
67  ret double %mul
68}
69
70; sqrt(a) * sqrt(b) * sqrt(c) * sqrt(d) => sqrt(a*b*c*d) with fast-math
71; 'reassoc nnan' on the fmuls is all that is required, but check propagation of other FMF.
72
73define double @sqrt_a_sqrt_b_sqrt_c_sqrt_d_reassoc(double %a, double %b, double %c, double %d) {
74; CHECK-LABEL: @sqrt_a_sqrt_b_sqrt_c_sqrt_d_reassoc(
75; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nnan arcp double [[A:%.*]], [[B:%.*]]
76; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc nnan double [[TMP1]], [[C:%.*]]
77; CHECK-NEXT:    [[TMP3:%.*]] = fmul reassoc nnan ninf double [[TMP2]], [[D:%.*]]
78; CHECK-NEXT:    [[TMP4:%.*]] = call reassoc nnan ninf double @llvm.sqrt.f64(double [[TMP3]])
79; CHECK-NEXT:    ret double [[TMP4]]
80;
81  %1 = call double @llvm.sqrt.f64(double %a)
82  %2 = call double @llvm.sqrt.f64(double %b)
83  %3 = call double @llvm.sqrt.f64(double %c)
84  %4 = call double @llvm.sqrt.f64(double %d)
85  %mul = fmul reassoc nnan arcp double %1, %2
86  %mul1 = fmul reassoc nnan double %mul, %3
87  %mul2 = fmul reassoc nnan ninf double %mul1, %4
88  ret double %mul2
89}
90
91define double @rsqrt_squared(double %x) {
92; CHECK-LABEL: @rsqrt_squared(
93; CHECK-NEXT:    [[SQUARED:%.*]] = fdiv fast double 1.000000e+00, [[X:%.*]]
94; CHECK-NEXT:    ret double [[SQUARED]]
95;
96  %sqrt = call fast double @llvm.sqrt.f64(double %x)
97  %rsqrt = fdiv fast double 1.0, %sqrt
98  %squared = fmul fast double %rsqrt, %rsqrt
99  ret double %squared
100}
101
102define double @sqrt_divisor_squared(double %x, double %y) {
103; CHECK-LABEL: @sqrt_divisor_squared(
104; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nnan nsz double [[Y:%.*]], [[Y]]
105; CHECK-NEXT:    [[SQUARED:%.*]] = fdiv reassoc nnan nsz double [[TMP1]], [[X:%.*]]
106; CHECK-NEXT:    ret double [[SQUARED]]
107;
108  %sqrt = call double @llvm.sqrt.f64(double %x)
109  %div = fdiv double %y, %sqrt
110  %squared = fmul reassoc nnan nsz double %div, %div
111  ret double %squared
112}
113
114define <2 x float> @sqrt_dividend_squared(<2 x float> %x, <2 x float> %y) {
115; CHECK-LABEL: @sqrt_dividend_squared(
116; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast <2 x float> [[Y:%.*]], [[Y]]
117; CHECK-NEXT:    [[SQUARED:%.*]] = fdiv fast <2 x float> [[X:%.*]], [[TMP1]]
118; CHECK-NEXT:    ret <2 x float> [[SQUARED]]
119;
120  %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
121  %div = fdiv fast <2 x float> %sqrt, %y
122  %squared = fmul fast <2 x float> %div, %div
123  ret <2 x float> %squared
124}
125
126; We do not transform this because it would result in an extra instruction.
127; This might still be a good optimization for the backend.
128
129define double @sqrt_divisor_squared_extra_use(double %x, double %y) {
130; CHECK-LABEL: @sqrt_divisor_squared_extra_use(
131; CHECK-NEXT:    [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]])
132; CHECK-NEXT:    [[DIV:%.*]] = fdiv double [[Y:%.*]], [[SQRT]]
133; CHECK-NEXT:    call void @use(double [[DIV]])
134; CHECK-NEXT:    [[SQUARED:%.*]] = fmul reassoc nnan nsz double [[DIV]], [[DIV]]
135; CHECK-NEXT:    ret double [[SQUARED]]
136;
137  %sqrt = call double @llvm.sqrt.f64(double %x)
138  %div = fdiv double %y, %sqrt
139  call void @use(double %div)
140  %squared = fmul reassoc nnan nsz double %div, %div
141  ret double %squared
142}
143
144define double @sqrt_dividend_squared_extra_use(double %x, double %y) {
145; CHECK-LABEL: @sqrt_dividend_squared_extra_use(
146; CHECK-NEXT:    [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]])
147; CHECK-NEXT:    call void @use(double [[SQRT]])
148; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[Y:%.*]], [[Y]]
149; CHECK-NEXT:    [[SQUARED:%.*]] = fdiv fast double [[X]], [[TMP1]]
150; CHECK-NEXT:    ret double [[SQUARED]]
151;
152  %sqrt = call double @llvm.sqrt.f64(double %x)
153  call void @use(double %sqrt)
154  %div = fdiv fast double %sqrt, %y
155  %squared = fmul fast double %div, %div
156  ret double %squared
157}
158
159; Negative test - require 'nsz'.
160
161define double @sqrt_divisor_not_enough_FMF(double %x, double %y) {
162; CHECK-LABEL: @sqrt_divisor_not_enough_FMF(
163; CHECK-NEXT:    [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]])
164; CHECK-NEXT:    [[DIV:%.*]] = fdiv double [[Y:%.*]], [[SQRT]]
165; CHECK-NEXT:    [[SQUARED:%.*]] = fmul reassoc nnan double [[DIV]], [[DIV]]
166; CHECK-NEXT:    ret double [[SQUARED]]
167;
168  %sqrt = call double @llvm.sqrt.f64(double %x)
169  %div = fdiv double %y, %sqrt
170  %squared = fmul reassoc nnan double %div, %div
171  ret double %squared
172}
173
174; TODO: This is a special-case of the general pattern. If we have a constant
175; operand, the extra use limitation could be eased because this does not
176; result in an extra instruction (1.0 * 1.0 is constant folded).
177
178define double @rsqrt_squared_extra_use(double %x) {
179; CHECK-LABEL: @rsqrt_squared_extra_use(
180; CHECK-NEXT:    [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]])
181; CHECK-NEXT:    [[RSQRT:%.*]] = fdiv fast double 1.000000e+00, [[SQRT]]
182; CHECK-NEXT:    call void @use(double [[RSQRT]])
183; CHECK-NEXT:    [[SQUARED:%.*]] = fmul fast double [[RSQRT]], [[RSQRT]]
184; CHECK-NEXT:    ret double [[SQUARED]]
185;
186  %sqrt = call fast double @llvm.sqrt.f64(double %x)
187  %rsqrt = fdiv fast double 1.0, %sqrt
188  call void @use(double %rsqrt)
189  %squared = fmul fast double %rsqrt, %rsqrt
190  ret double %squared
191}
192