1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
3; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
5
6; FUNC-LABEL: {{^}}rcp_pat_f32:
7; GCN: s_load_dword [[SRC:s[0-9]+]]
8; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
9; GCN: buffer_store_dword [[RCP]]
10
11; EG: RECIP_IEEE
12define amdgpu_kernel void @rcp_pat_f32(float addrspace(1)* %out, float %src) #0 {
13  %rcp = fdiv float 1.0, %src
14  store float %rcp, float addrspace(1)* %out, align 4
15  ret void
16}
17
18; FUNC-LABEL: {{^}}rcp_ulp25_pat_f32:
19; GCN: s_load_dword [[SRC:s[0-9]+]]
20; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
21; GCN: buffer_store_dword [[RCP]]
22
23; EG: RECIP_IEEE
24define amdgpu_kernel void @rcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
25  %rcp = fdiv float 1.0, %src, !fpmath !0
26  store float %rcp, float addrspace(1)* %out, align 4
27  ret void
28}
29
30; FUNC-LABEL: {{^}}rcp_fast_ulp25_pat_f32:
31; GCN: s_load_dword [[SRC:s[0-9]+]]
32; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
33; GCN: buffer_store_dword [[RCP]]
34
35; EG: RECIP_IEEE
36define amdgpu_kernel void @rcp_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
37  %rcp = fdiv fast float 1.0, %src, !fpmath !0
38  store float %rcp, float addrspace(1)* %out, align 4
39  ret void
40}
41
42; FUNC-LABEL: {{^}}rcp_arcp_ulp25_pat_f32:
43; GCN: s_load_dword [[SRC:s[0-9]+]]
44; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
45; GCN: buffer_store_dword [[RCP]]
46
47; EG: RECIP_IEEE
48define amdgpu_kernel void @rcp_arcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
49  %rcp = fdiv arcp float 1.0, %src, !fpmath !0
50  store float %rcp, float addrspace(1)* %out, align 4
51  ret void
52}
53
54; FUNC-LABEL: {{^}}rcp_global_fast_ulp25_pat_f32:
55; GCN: s_load_dword [[SRC:s[0-9]+]]
56; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
57; GCN: buffer_store_dword [[RCP]]
58
59; EG: RECIP_IEEE
60define amdgpu_kernel void @rcp_global_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #2 {
61  %rcp = fdiv float 1.0, %src, !fpmath !0
62  store float %rcp, float addrspace(1)* %out, align 4
63  ret void
64}
65
66; FUNC-LABEL: {{^}}rcp_fabs_pat_f32:
67; GCN: s_load_dword [[SRC:s[0-9]+]]
68; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], |[[SRC]]|
69; GCN: buffer_store_dword [[RCP]]
70
71; EG: RECIP_IEEE
72define amdgpu_kernel void @rcp_fabs_pat_f32(float addrspace(1)* %out, float %src) #0 {
73  %src.fabs = call float @llvm.fabs.f32(float %src)
74  %rcp = fdiv float 1.0, %src.fabs
75  store float %rcp, float addrspace(1)* %out, align 4
76  ret void
77}
78
79; FUNC-LABEL: {{^}}neg_rcp_pat_f32:
80; GCN: s_load_dword [[SRC:s[0-9]+]]
81; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[SRC]]
82; GCN: buffer_store_dword [[RCP]]
83
84; EG: RECIP_IEEE
85define amdgpu_kernel void @neg_rcp_pat_f32(float addrspace(1)* %out, float %src) #0 {
86  %rcp = fdiv float -1.0, %src
87  store float %rcp, float addrspace(1)* %out, align 4
88  ret void
89}
90
91; FUNC-LABEL: {{^}}rcp_fabs_fneg_pat_f32:
92; GCN: s_load_dword [[SRC:s[0-9]+]]
93; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -|[[SRC]]|
94; GCN: buffer_store_dword [[RCP]]
95define amdgpu_kernel void @rcp_fabs_fneg_pat_f32(float addrspace(1)* %out, float %src) #0 {
96  %src.fabs = call float @llvm.fabs.f32(float %src)
97  %src.fabs.fneg = fsub float -0.0, %src.fabs
98  %rcp = fdiv float 1.0, %src.fabs.fneg
99  store float %rcp, float addrspace(1)* %out, align 4
100  ret void
101}
102
103; FUNC-LABEL: {{^}}rcp_fabs_fneg_pat_multi_use_f32:
104; GCN: s_load_dword [[SRC:s[0-9]+]]
105; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -|[[SRC]]|
106; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[SRC]], -|[[SRC]]|
107; GCN: buffer_store_dword [[RCP]]
108; GCN: buffer_store_dword [[MUL]]
109define amdgpu_kernel void @rcp_fabs_fneg_pat_multi_use_f32(float addrspace(1)* %out, float %src) #0 {
110  %src.fabs = call float @llvm.fabs.f32(float %src)
111  %src.fabs.fneg = fsub float -0.0, %src.fabs
112  %rcp = fdiv float 1.0, %src.fabs.fneg
113  store volatile float %rcp, float addrspace(1)* %out, align 4
114
115  %other = fmul float %src, %src.fabs.fneg
116  store volatile float %other, float addrspace(1)* %out, align 4
117  ret void
118}
119
120; FUNC-LABEL: {{^}}div_arcp_2_x_pat_f32:
121; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0.5, v{{[0-9]+}}
122; GCN: buffer_store_dword [[MUL]]
123define amdgpu_kernel void @div_arcp_2_x_pat_f32(float addrspace(1)* %out) #0 {
124  %x = load float, float addrspace(1)* undef
125  %rcp = fdiv arcp float %x, 2.0
126  store float %rcp, float addrspace(1)* %out, align 4
127  ret void
128}
129
130; FUNC-LABEL: {{^}}div_arcp_k_x_pat_f32:
131; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0x3dcccccd, v{{[0-9]+}}
132; GCN: buffer_store_dword [[MUL]]
133define amdgpu_kernel void @div_arcp_k_x_pat_f32(float addrspace(1)* %out) #0 {
134  %x = load float, float addrspace(1)* undef
135  %rcp = fdiv arcp float %x, 10.0
136  store float %rcp, float addrspace(1)* %out, align 4
137  ret void
138}
139
140; FUNC-LABEL: {{^}}div_arcp_neg_k_x_pat_f32:
141; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0xbdcccccd, v{{[0-9]+}}
142; GCN: buffer_store_dword [[MUL]]
143define amdgpu_kernel void @div_arcp_neg_k_x_pat_f32(float addrspace(1)* %out) #0 {
144  %x = load float, float addrspace(1)* undef
145  %rcp = fdiv arcp float %x, -10.0
146  store float %rcp, float addrspace(1)* %out, align 4
147  ret void
148}
149
150declare float @llvm.fabs.f32(float) #1
151declare float @llvm.sqrt.f32(float) #1
152
153attributes #0 = { nounwind "unsafe-fp-math"="false" }
154attributes #1 = { nounwind readnone }
155attributes #2 = { nounwind "unsafe-fp-math"="true" }
156
157!0 = !{float 2.500000e+00}
158