1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
4
5; PR31455 - https://bugs.llvm.org/show_bug.cgi?id=31455
6; We have to assume that errno can be set, so we have to make a libcall in that case.
7; But it's better for perf to check that the argument is valid rather than the result of
8; sqrtss/sqrtsd.
9; Note: This is really a test of the -partially-inline-libcalls IR pass (and we have an IR test
10; for that), but we're checking the final asm to make sure that comes out as expected too.
11
12define float @f(float %val) nounwind {
13; SSE-LABEL: f:
14; SSE:       # %bb.0:
15; SSE-NEXT:    xorps %xmm1, %xmm1
16; SSE-NEXT:    ucomiss %xmm1, %xmm0
17; SSE-NEXT:    jb .LBB0_2
18; SSE-NEXT:  # %bb.1: # %.split
19; SSE-NEXT:    sqrtss %xmm0, %xmm0
20; SSE-NEXT:    retq
21; SSE-NEXT:  .LBB0_2: # %call.sqrt
22; SSE-NEXT:    jmp sqrtf # TAILCALL
23;
24; AVX-LABEL: f:
25; AVX:       # %bb.0:
26; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
27; AVX-NEXT:    vucomiss %xmm1, %xmm0
28; AVX-NEXT:    jb .LBB0_2
29; AVX-NEXT:  # %bb.1: # %.split
30; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
31; AVX-NEXT:    retq
32; AVX-NEXT:  .LBB0_2: # %call.sqrt
33; AVX-NEXT:    jmp sqrtf # TAILCALL
34  %res = tail call float @sqrtf(float %val)
35  ret float %res
36}
37
38define double @d(double %val) nounwind {
39; SSE-LABEL: d:
40; SSE:       # %bb.0:
41; SSE-NEXT:    xorpd %xmm1, %xmm1
42; SSE-NEXT:    ucomisd %xmm1, %xmm0
43; SSE-NEXT:    jb .LBB1_2
44; SSE-NEXT:  # %bb.1: # %.split
45; SSE-NEXT:    sqrtsd %xmm0, %xmm0
46; SSE-NEXT:    retq
47; SSE-NEXT:  .LBB1_2: # %call.sqrt
48; SSE-NEXT:    jmp sqrt # TAILCALL
49;
50; AVX-LABEL: d:
51; AVX:       # %bb.0:
52; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
53; AVX-NEXT:    vucomisd %xmm1, %xmm0
54; AVX-NEXT:    jb .LBB1_2
55; AVX-NEXT:  # %bb.1: # %.split
56; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
57; AVX-NEXT:    retq
58; AVX-NEXT:  .LBB1_2: # %call.sqrt
59; AVX-NEXT:    jmp sqrt # TAILCALL
60  %res = tail call double @sqrt(double %val)
61  ret double %res
62}
63
64define double @minsize(double %x, double %y) minsize {
65; SSE-LABEL: minsize:
66; SSE:       # %bb.0:
67; SSE-NEXT:    mulsd %xmm0, %xmm0
68; SSE-NEXT:    mulsd %xmm1, %xmm1
69; SSE-NEXT:    addsd %xmm0, %xmm1
70; SSE-NEXT:    sqrtsd %xmm1, %xmm0
71; SSE-NEXT:    retq
72;
73; AVX-LABEL: minsize:
74; AVX:       # %bb.0:
75; AVX-NEXT:    vmulsd %xmm0, %xmm0, %xmm0
76; AVX-NEXT:    vmulsd %xmm1, %xmm1, %xmm1
77; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
78; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
79; AVX-NEXT:    retq
80  %t3 = fmul fast double %x, %x
81  %t4 = fmul fast double %y, %y
82  %t5 = fadd fast double %t3, %t4
83  %t6 = tail call fast double @llvm.sqrt.f64(double %t5)
84  ret double %t6
85}
86
87; Partial reg avoidance may involve register allocation
88; rather than adding an instruction.
89
90define double @partial_dep_minsize(double %x, double %y) minsize {
91; SSE-LABEL: partial_dep_minsize:
92; SSE:       # %bb.0:
93; SSE-NEXT:    sqrtsd %xmm1, %xmm0
94; SSE-NEXT:    addsd %xmm1, %xmm0
95; SSE-NEXT:    retq
96;
97; AVX-LABEL: partial_dep_minsize:
98; AVX:       # %bb.0:
99; AVX-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm0
100; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
101; AVX-NEXT:    retq
102  %t6 = tail call fast double @llvm.sqrt.f64(double %y)
103  %t = fadd fast double %t6, %y
104  ret double %t
105}
106
107declare dso_local float @sqrtf(float)
108declare dso_local double @sqrt(double)
109declare dso_local double @llvm.sqrt.f64(double)
110