1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -instcombine < %s | FileCheck %s 3 4declare double @llvm.sqrt.f64(double) nounwind readnone speculatable 5declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) 6declare void @use(double) 7 8; sqrt(a) * sqrt(b) no math flags 9 10define double @sqrt_a_sqrt_b(double %a, double %b) { 11; CHECK-LABEL: @sqrt_a_sqrt_b( 12; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.sqrt.f64(double [[A:%.*]]) 13; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.sqrt.f64(double [[B:%.*]]) 14; CHECK-NEXT: [[MUL:%.*]] = fmul double [[TMP1]], [[TMP2]] 15; CHECK-NEXT: ret double [[MUL]] 16; 17 %1 = call double @llvm.sqrt.f64(double %a) 18 %2 = call double @llvm.sqrt.f64(double %b) 19 %mul = fmul double %1, %2 20 ret double %mul 21} 22 23; sqrt(a) * sqrt(b) fast-math, multiple uses 24 25define double @sqrt_a_sqrt_b_multiple_uses(double %a, double %b) { 26; CHECK-LABEL: @sqrt_a_sqrt_b_multiple_uses( 27; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.sqrt.f64(double [[A:%.*]]) 28; CHECK-NEXT: [[TMP2:%.*]] = call fast double @llvm.sqrt.f64(double [[B:%.*]]) 29; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[TMP1]], [[TMP2]] 30; CHECK-NEXT: call void @use(double [[TMP2]]) 31; CHECK-NEXT: ret double [[MUL]] 32; 33 %1 = call fast double @llvm.sqrt.f64(double %a) 34 %2 = call fast double @llvm.sqrt.f64(double %b) 35 %mul = fmul fast double %1, %2 36 call void @use(double %2) 37 ret double %mul 38} 39 40; sqrt(a) * sqrt(b) => sqrt(a*b) with fast-math 41 42define double @sqrt_a_sqrt_b_reassoc_nnan(double %a, double %b) { 43; CHECK-LABEL: @sqrt_a_sqrt_b_reassoc_nnan( 44; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nnan double [[A:%.*]], [[B:%.*]] 45; CHECK-NEXT: [[TMP2:%.*]] = call reassoc nnan double @llvm.sqrt.f64(double [[TMP1]]) 46; CHECK-NEXT: ret double [[TMP2]] 47; 48 %1 = call double @llvm.sqrt.f64(double %a) 49 %2 = call double @llvm.sqrt.f64(double %b) 50 %mul = fmul reassoc nnan double %1, %2 51 ret double %mul 52} 53 54; nnan disallows the possibility that both operands are negative, 55; so we won't return a number when the answer should be NaN. 56 57define double @sqrt_a_sqrt_b_reassoc(double %a, double %b) { 58; CHECK-LABEL: @sqrt_a_sqrt_b_reassoc( 59; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.sqrt.f64(double [[A:%.*]]) 60; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.sqrt.f64(double [[B:%.*]]) 61; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc double [[TMP1]], [[TMP2]] 62; CHECK-NEXT: ret double [[MUL]] 63; 64 %1 = call double @llvm.sqrt.f64(double %a) 65 %2 = call double @llvm.sqrt.f64(double %b) 66 %mul = fmul reassoc double %1, %2 67 ret double %mul 68} 69 70; sqrt(a) * sqrt(b) * sqrt(c) * sqrt(d) => sqrt(a*b*c*d) with fast-math 71; 'reassoc nnan' on the fmuls is all that is required, but check propagation of other FMF. 72 73define double @sqrt_a_sqrt_b_sqrt_c_sqrt_d_reassoc(double %a, double %b, double %c, double %d) { 74; CHECK-LABEL: @sqrt_a_sqrt_b_sqrt_c_sqrt_d_reassoc( 75; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nnan arcp double [[A:%.*]], [[B:%.*]] 76; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc nnan double [[TMP1]], [[C:%.*]] 77; CHECK-NEXT: [[TMP3:%.*]] = fmul reassoc nnan ninf double [[TMP2]], [[D:%.*]] 78; CHECK-NEXT: [[TMP4:%.*]] = call reassoc nnan ninf double @llvm.sqrt.f64(double [[TMP3]]) 79; CHECK-NEXT: ret double [[TMP4]] 80; 81 %1 = call double @llvm.sqrt.f64(double %a) 82 %2 = call double @llvm.sqrt.f64(double %b) 83 %3 = call double @llvm.sqrt.f64(double %c) 84 %4 = call double @llvm.sqrt.f64(double %d) 85 %mul = fmul reassoc nnan arcp double %1, %2 86 %mul1 = fmul reassoc nnan double %mul, %3 87 %mul2 = fmul reassoc nnan ninf double %mul1, %4 88 ret double %mul2 89} 90 91define double @rsqrt_squared(double %x) { 92; CHECK-LABEL: @rsqrt_squared( 93; CHECK-NEXT: [[SQUARED:%.*]] = fdiv fast double 1.000000e+00, [[X:%.*]] 94; CHECK-NEXT: ret double [[SQUARED]] 95; 96 %sqrt = call fast double @llvm.sqrt.f64(double %x) 97 %rsqrt = fdiv fast double 1.0, %sqrt 98 %squared = fmul fast double %rsqrt, %rsqrt 99 ret double %squared 100} 101 102define double @rsqrt_x_reassociate_extra_use(double %x, double * %p) { 103; CHECK-LABEL: @rsqrt_x_reassociate_extra_use( 104; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]]) 105; CHECK-NEXT: [[RSQRT:%.*]] = fdiv double 1.000000e+00, [[SQRT]] 106; CHECK-NEXT: [[RES:%.*]] = fdiv reassoc nsz double [[X:%.*]], [[SQRT]] 107; CHECK-NEXT: store double [[RSQRT]], double* [[P:%.*]], align 8 108; CHECK-NEXT: ret double [[RES]] 109; 110 %sqrt = call double @llvm.sqrt.f64(double %x) 111 %rsqrt = fdiv double 1.0, %sqrt 112 %res = fmul reassoc nsz double %rsqrt, %x 113 store double %rsqrt, double* %p 114 ret double %res 115} 116 117define <2 x float> @x_add_y_rsqrt_reassociate_extra_use(<2 x float> %x, <2 x float> %y, <2 x float>* %p) { 118; CHECK-LABEL: @x_add_y_rsqrt_reassociate_extra_use( 119; CHECK-NEXT: [[ADD:%.*]] = fadd fast <2 x float> [[X:%.*]], [[Y:%.*]] 120; CHECK-NEXT: [[SQRT:%.*]] = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ADD]]) 121; CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[SQRT]] 122; CHECK-NEXT: [[RES:%.*]] = fdiv fast <2 x float> [[ADD]], [[SQRT]] 123; CHECK-NEXT: store <2 x float> [[RSQRT]], <2 x float>* [[P:%.*]], align 8 124; CHECK-NEXT: ret <2 x float> [[RES]] 125; 126 %add = fadd fast <2 x float> %x, %y ; thwart complexity-based canonicalization 127 %sqrt = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %add) 128 %rsqrt = fdiv fast <2 x float> <float 1.0, float 1.0>, %sqrt 129 %res = fmul fast <2 x float> %add, %rsqrt 130 store <2 x float> %rsqrt, <2 x float>* %p 131 ret <2 x float> %res 132} 133 134define double @sqrt_divisor_squared(double %x, double %y) { 135; CHECK-LABEL: @sqrt_divisor_squared( 136; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nnan nsz double [[Y:%.*]], [[Y]] 137; CHECK-NEXT: [[SQUARED:%.*]] = fdiv reassoc nnan nsz double [[TMP1]], [[X:%.*]] 138; CHECK-NEXT: ret double [[SQUARED]] 139; 140 %sqrt = call double @llvm.sqrt.f64(double %x) 141 %div = fdiv double %y, %sqrt 142 %squared = fmul reassoc nnan nsz double %div, %div 143 ret double %squared 144} 145 146define <2 x float> @sqrt_dividend_squared(<2 x float> %x, <2 x float> %y) { 147; CHECK-LABEL: @sqrt_dividend_squared( 148; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x float> [[Y:%.*]], [[Y]] 149; CHECK-NEXT: [[SQUARED:%.*]] = fdiv fast <2 x float> [[X:%.*]], [[TMP1]] 150; CHECK-NEXT: ret <2 x float> [[SQUARED]] 151; 152 %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x) 153 %div = fdiv fast <2 x float> %sqrt, %y 154 %squared = fmul fast <2 x float> %div, %div 155 ret <2 x float> %squared 156} 157 158; We do not transform this because it would result in an extra instruction. 159; This might still be a good optimization for the backend. 160 161define double @sqrt_divisor_squared_extra_use(double %x, double %y) { 162; CHECK-LABEL: @sqrt_divisor_squared_extra_use( 163; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]]) 164; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[Y:%.*]], [[SQRT]] 165; CHECK-NEXT: call void @use(double [[DIV]]) 166; CHECK-NEXT: [[SQUARED:%.*]] = fmul reassoc nnan nsz double [[DIV]], [[DIV]] 167; CHECK-NEXT: ret double [[SQUARED]] 168; 169 %sqrt = call double @llvm.sqrt.f64(double %x) 170 %div = fdiv double %y, %sqrt 171 call void @use(double %div) 172 %squared = fmul reassoc nnan nsz double %div, %div 173 ret double %squared 174} 175 176define double @sqrt_dividend_squared_extra_use(double %x, double %y) { 177; CHECK-LABEL: @sqrt_dividend_squared_extra_use( 178; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]]) 179; CHECK-NEXT: call void @use(double [[SQRT]]) 180; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[Y:%.*]], [[Y]] 181; CHECK-NEXT: [[SQUARED:%.*]] = fdiv fast double [[X]], [[TMP1]] 182; CHECK-NEXT: ret double [[SQUARED]] 183; 184 %sqrt = call double @llvm.sqrt.f64(double %x) 185 call void @use(double %sqrt) 186 %div = fdiv fast double %sqrt, %y 187 %squared = fmul fast double %div, %div 188 ret double %squared 189} 190 191; Negative test - require 'nsz'. 192 193define double @sqrt_divisor_not_enough_FMF(double %x, double %y) { 194; CHECK-LABEL: @sqrt_divisor_not_enough_FMF( 195; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]]) 196; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[Y:%.*]], [[SQRT]] 197; CHECK-NEXT: [[SQUARED:%.*]] = fmul reassoc nnan double [[DIV]], [[DIV]] 198; CHECK-NEXT: ret double [[SQUARED]] 199; 200 %sqrt = call double @llvm.sqrt.f64(double %x) 201 %div = fdiv double %y, %sqrt 202 %squared = fmul reassoc nnan double %div, %div 203 ret double %squared 204} 205 206; TODO: This is a special-case of the general pattern. If we have a constant 207; operand, the extra use limitation could be eased because this does not 208; result in an extra instruction (1.0 * 1.0 is constant folded). 209 210define double @rsqrt_squared_extra_use(double %x) { 211; CHECK-LABEL: @rsqrt_squared_extra_use( 212; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]]) 213; CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast double 1.000000e+00, [[SQRT]] 214; CHECK-NEXT: call void @use(double [[RSQRT]]) 215; CHECK-NEXT: [[SQUARED:%.*]] = fmul fast double [[RSQRT]], [[RSQRT]] 216; CHECK-NEXT: ret double [[SQUARED]] 217; 218 %sqrt = call fast double @llvm.sqrt.f64(double %x) 219 %rsqrt = fdiv fast double 1.0, %sqrt 220 call void @use(double %rsqrt) 221 %squared = fmul fast double %rsqrt, %rsqrt 222 ret double %squared 223} 224