1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s 2 3declare float @llvm.amdgcn.rcp.f32(float) #0 4declare double @llvm.amdgcn.rcp.f64(double) #0 5 6declare double @llvm.sqrt.f64(double) #0 7declare float @llvm.sqrt.f32(float) #0 8 9; FUNC-LABEL: {{^}}rcp_undef_f32: 10; SI-NOT: v_rcp_f32 11define amdgpu_kernel void @rcp_undef_f32(float addrspace(1)* %out) #1 { 12 %rcp = call float @llvm.amdgcn.rcp.f32(float undef) 13 store float %rcp, float addrspace(1)* %out, align 4 14 ret void 15} 16 17; FUNC-LABEL: {{^}}rcp_2_f32: 18; SI-NOT: v_rcp_f32 19; SI: v_mov_b32_e32 v{{[0-9]+}}, 0.5 20define amdgpu_kernel void @rcp_2_f32(float addrspace(1)* %out) #1 { 21 %rcp = call float @llvm.amdgcn.rcp.f32(float 2.0) 22 store float %rcp, float addrspace(1)* %out, align 4 23 ret void 24} 25 26; FUNC-LABEL: {{^}}rcp_10_f32: 27; SI-NOT: v_rcp_f32 28; SI: v_mov_b32_e32 v{{[0-9]+}}, 0x3dcccccd 29define amdgpu_kernel void @rcp_10_f32(float addrspace(1)* %out) #1 { 30 %rcp = call float @llvm.amdgcn.rcp.f32(float 10.0) 31 store float %rcp, float addrspace(1)* %out, align 4 32 ret void 33} 34 35; FUNC-LABEL: {{^}}safe_no_fp32_denormals_rcp_f32: 36; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}} 37; SI-NOT: [[RESULT]] 38; SI: buffer_store_dword [[RESULT]] 39define amdgpu_kernel void @safe_no_fp32_denormals_rcp_f32(float addrspace(1)* %out, float %src) #1 { 40 %rcp = fdiv float 1.0, %src, !fpmath !0 41 store float %rcp, float addrspace(1)* %out, align 4 42 ret void 43} 44 45; FUNC-LABEL: {{^}}safe_f32_denormals_rcp_pat_f32: 46; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}} 47; SI-NOT: [[RESULT]] 48; SI: buffer_store_dword [[RESULT]] 49define amdgpu_kernel void @safe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #4 { 50 %rcp = fdiv float 1.0, %src, !fpmath !0 51 store float %rcp, float addrspace(1)* %out, align 4 52 ret void 53} 54 55; FUNC-LABEL: {{^}}unsafe_f32_denormals_rcp_pat_f32: 56; SI: v_div_scale_f32 57define amdgpu_kernel void @unsafe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #3 { 58 %rcp = fdiv float 1.0, %src 59 store float %rcp, float addrspace(1)* %out, align 4 60 ret void 61} 62 63; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f32: 64; SI: v_rsq_f32_e32 65define amdgpu_kernel void @safe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #1 { 66 %sqrt = call float @llvm.sqrt.f32(float %src) 67 %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt) 68 store float %rcp, float addrspace(1)* %out, align 4 69 ret void 70} 71 72; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f32: 73; SI: v_rsq_f32_e32 74define amdgpu_kernel void @unsafe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #2 { 75 %sqrt = call float @llvm.sqrt.f32(float %src) 76 %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt) 77 store float %rcp, float addrspace(1)* %out, align 4 78 ret void 79} 80 81; FUNC-LABEL: {{^}}rcp_f64: 82; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} 83; SI-NOT: [[RESULT]] 84; SI: buffer_store_dwordx2 [[RESULT]] 85define amdgpu_kernel void @rcp_f64(double addrspace(1)* %out, double %src) #1 { 86 %rcp = call double @llvm.amdgcn.rcp.f64(double %src) 87 store double %rcp, double addrspace(1)* %out, align 8 88 ret void 89} 90 91; FUNC-LABEL: {{^}}unsafe_rcp_f64: 92; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} 93; SI-NOT: [[RESULT]] 94; SI: buffer_store_dwordx2 [[RESULT]] 95define amdgpu_kernel void @unsafe_rcp_f64(double addrspace(1)* %out, double %src) #2 { 96 %rcp = call double @llvm.amdgcn.rcp.f64(double %src) 97 store double %rcp, double addrspace(1)* %out, align 8 98 ret void 99} 100 101; FUNC-LABEL: {{^}}rcp_pat_f64: 102; SI: v_div_scale_f64 103define amdgpu_kernel void @rcp_pat_f64(double addrspace(1)* %out, double %src) #1 { 104 %rcp = fdiv double 1.0, %src 105 store double %rcp, double addrspace(1)* %out, align 8 106 ret void 107} 108 109; FUNC-LABEL: {{^}}unsafe_rcp_pat_f64: 110; SI: v_rcp_f64 111; SI: v_fma_f64 112; SI: v_fma_f64 113; SI: v_fma_f64 114; SI: v_fma_f64 115; SI: v_fma_f64 116; SI: v_fma_f64 117define amdgpu_kernel void @unsafe_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 { 118 %rcp = fdiv double 1.0, %src 119 store double %rcp, double addrspace(1)* %out, align 8 120 ret void 121} 122 123; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f64: 124; SI-NOT: v_rsq_f64_e32 125; SI: v_sqrt_f64 126; SI: v_rcp_f64 127define amdgpu_kernel void @safe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #1 { 128 %sqrt = call double @llvm.sqrt.f64(double %src) 129 %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt) 130 store double %rcp, double addrspace(1)* %out, align 8 131 ret void 132} 133 134; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f64: 135; SI: v_sqrt_f64_e32 [[SQRT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} 136; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SQRT]] 137; SI: buffer_store_dwordx2 [[RESULT]] 138define amdgpu_kernel void @unsafe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 { 139 %sqrt = call double @llvm.sqrt.f64(double %src) 140 %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt) 141 store double %rcp, double addrspace(1)* %out, align 8 142 ret void 143} 144 145attributes #0 = { nounwind readnone } 146attributes #1 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 147attributes #2 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 148attributes #3 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="ieee,ieee" } 149attributes #4 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="ieee,ieee" } 150 151!0 = !{float 2.500000e+00} 152