1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX 4 5; PR31455 - https://bugs.llvm.org/show_bug.cgi?id=31455 6; We have to assume that errno can be set, so we have to make a libcall in that case. 7; But it's better for perf to check that the argument is valid rather than the result of 8; sqrtss/sqrtsd. 9; Note: This is really a test of the -partially-inline-libcalls IR pass (and we have an IR test 10; for that), but we're checking the final asm to make sure that comes out as expected too. 11 12define float @f(float %val) nounwind { 13; SSE-LABEL: f: 14; SSE: # %bb.0: 15; SSE-NEXT: xorps %xmm1, %xmm1 16; SSE-NEXT: ucomiss %xmm1, %xmm0 17; SSE-NEXT: jb .LBB0_2 18; SSE-NEXT: # %bb.1: # %.split 19; SSE-NEXT: sqrtss %xmm0, %xmm0 20; SSE-NEXT: retq 21; SSE-NEXT: .LBB0_2: # %call.sqrt 22; SSE-NEXT: jmp sqrtf # TAILCALL 23; 24; AVX-LABEL: f: 25; AVX: # %bb.0: 26; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 27; AVX-NEXT: vucomiss %xmm1, %xmm0 28; AVX-NEXT: jb .LBB0_2 29; AVX-NEXT: # %bb.1: # %.split 30; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 31; AVX-NEXT: retq 32; AVX-NEXT: .LBB0_2: # %call.sqrt 33; AVX-NEXT: jmp sqrtf # TAILCALL 34 %res = tail call float @sqrtf(float %val) 35 ret float %res 36} 37 38define double @d(double %val) nounwind { 39; SSE-LABEL: d: 40; SSE: # %bb.0: 41; SSE-NEXT: xorpd %xmm1, %xmm1 42; SSE-NEXT: ucomisd %xmm1, %xmm0 43; SSE-NEXT: jb .LBB1_2 44; SSE-NEXT: # %bb.1: # %.split 45; SSE-NEXT: sqrtsd %xmm0, %xmm0 46; SSE-NEXT: retq 47; SSE-NEXT: .LBB1_2: # %call.sqrt 48; SSE-NEXT: jmp sqrt # TAILCALL 49; 50; AVX-LABEL: d: 51; AVX: # %bb.0: 52; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 53; AVX-NEXT: vucomisd %xmm1, %xmm0 54; AVX-NEXT: jb .LBB1_2 55; AVX-NEXT: # %bb.1: # %.split 56; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 57; AVX-NEXT: retq 58; AVX-NEXT: .LBB1_2: # %call.sqrt 59; AVX-NEXT: jmp sqrt # TAILCALL 60 %res = tail call double @sqrt(double %val) 61 ret double %res 62} 63 64define double @minsize(double %x, double %y) minsize { 65; SSE-LABEL: minsize: 66; SSE: # %bb.0: 67; SSE-NEXT: mulsd %xmm0, %xmm0 68; SSE-NEXT: mulsd %xmm1, %xmm1 69; SSE-NEXT: addsd %xmm0, %xmm1 70; SSE-NEXT: sqrtsd %xmm1, %xmm0 71; SSE-NEXT: retq 72; 73; AVX-LABEL: minsize: 74; AVX: # %bb.0: 75; AVX-NEXT: vmulsd %xmm0, %xmm0, %xmm0 76; AVX-NEXT: vmulsd %xmm1, %xmm1, %xmm1 77; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 78; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 79; AVX-NEXT: retq 80 %t3 = fmul fast double %x, %x 81 %t4 = fmul fast double %y, %y 82 %t5 = fadd fast double %t3, %t4 83 %t6 = tail call fast double @llvm.sqrt.f64(double %t5) 84 ret double %t6 85} 86 87; Partial reg avoidance may involve register allocation 88; rather than adding an instruction. 89 90define double @partial_dep_minsize(double %x, double %y) minsize { 91; SSE-LABEL: partial_dep_minsize: 92; SSE: # %bb.0: 93; SSE-NEXT: sqrtsd %xmm1, %xmm0 94; SSE-NEXT: addsd %xmm1, %xmm0 95; SSE-NEXT: retq 96; 97; AVX-LABEL: partial_dep_minsize: 98; AVX: # %bb.0: 99; AVX-NEXT: vsqrtsd %xmm1, %xmm1, %xmm0 100; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 101; AVX-NEXT: retq 102 %t6 = tail call fast double @llvm.sqrt.f64(double %y) 103 %t = fadd fast double %t6, %y 104 ret double %t 105} 106 107declare dso_local float @sqrtf(float) 108declare dso_local double @sqrt(double) 109declare dso_local double @llvm.sqrt.f64(double) 110