1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=GCN,FUNC,SI
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=GCN,FUNC,VI
3; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=EG -check-prefix=FUNC
4
5declare float @llvm.fabs.f32(float) #1
6
7; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i32:
8; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
9
10; GCN: v_cvt_u32_f32_e32
11; GCN: s_endpgm
12define amdgpu_kernel void @fp_to_uint_f32_to_i32 (i32 addrspace(1)* %out, float %in) {
13  %conv = fptoui float %in to i32
14  store i32 %conv, i32 addrspace(1)* %out
15  ret void
16}
17
18; FUNC-LABEL: {{^}}fp_to_uint_v2f32_to_v2i32:
19; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
20; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
21
22; GCN: v_cvt_u32_f32_e32
23; GCN: v_cvt_u32_f32_e32
24define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
25  %result = fptoui <2 x float> %in to <2 x i32>
26  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
27  ret void
28}
29
30; FUNC-LABEL: {{^}}fp_to_uint_v4f32_to_v4i32:
31; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
32; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
33; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
34; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
35; GCN: v_cvt_u32_f32_e32
36; GCN: v_cvt_u32_f32_e32
37; GCN: v_cvt_u32_f32_e32
38; GCN: v_cvt_u32_f32_e32
39
40define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
41  %value = load <4 x float>, <4 x float> addrspace(1) * %in
42  %result = fptoui <4 x float> %value to <4 x i32>
43  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
44  ret void
45}
46
47; FUNC: {{^}}fp_to_uint_f32_to_i64:
48; EG-DAG: AND_INT
49; EG-DAG: LSHR
50; EG-DAG: SUB_INT
51; EG-DAG: AND_INT
52; EG-DAG: ASHR
53; EG-DAG: AND_INT
54; EG-DAG: OR_INT
55; EG-DAG: SUB_INT
56; EG-DAG: LSHL
57; EG-DAG: LSHL
58; EG-DAG: SUB_INT
59; EG-DAG: LSHR
60; EG-DAG: LSHR
61; EG-DAG: SETGT_UINT
62; EG-DAG: SETGT_INT
63; EG-DAG: XOR_INT
64; EG-DAG: XOR_INT
65; EG: SUB_INT
66; EG-DAG: SUB_INT
67; EG-DAG: CNDE_INT
68; EG-DAG: CNDE_INT
69
70; GCN: s_endpgm
71define amdgpu_kernel void @fp_to_uint_f32_to_i64(i64 addrspace(1)* %out, float %x) {
72  %conv = fptoui float %x to i64
73  store i64 %conv, i64 addrspace(1)* %out
74  ret void
75}
76
77; FUNC: {{^}}fp_to_uint_v2f32_to_v2i64:
78; EG-DAG: AND_INT
79; EG-DAG: LSHR
80; EG-DAG: SUB_INT
81; EG-DAG: AND_INT
82; EG-DAG: ASHR
83; EG-DAG: AND_INT
84; EG-DAG: OR_INT
85; EG-DAG: SUB_INT
86; EG-DAG: LSHL
87; EG-DAG: LSHL
88; EG-DAG: SUB_INT
89; EG-DAG: LSHR
90; EG-DAG: LSHR
91; EG-DAG: SETGT_UINT
92; EG-DAG: SETGT_INT
93; EG-DAG: XOR_INT
94; EG-DAG: XOR_INT
95; EG-DAG: SUB_INT
96; EG-DAG: SUB_INT
97; EG-DAG: CNDE_INT
98; EG-DAG: CNDE_INT
99; EG-DAG: AND_INT
100; EG-DAG: LSHR
101; EG-DAG: SUB_INT
102; EG-DAG: AND_INT
103; EG-DAG: ASHR
104; EG-DAG: AND_INT
105; EG-DAG: OR_INT
106; EG-DAG: SUB_INT
107; EG-DAG: LSHL
108; EG-DAG: LSHL
109; EG-DAG: SUB_INT
110; EG-DAG: LSHR
111; EG-DAG: LSHR
112; EG-DAG: SETGT_UINT
113; EG-DAG: SETGT_INT
114; EG-DAG: XOR_INT
115; EG-DAG: XOR_INT
116; EG-DAG: SUB_INT
117; EG-DAG: SUB_INT
118; EG-DAG: CNDE_INT
119; EG-DAG: CNDE_INT
120
121; GCN: s_endpgm
122define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
123  %conv = fptoui <2 x float> %x to <2 x i64>
124  store <2 x i64> %conv, <2 x i64> addrspace(1)* %out
125  ret void
126}
127
128; FUNC: {{^}}fp_to_uint_v4f32_to_v4i64:
129; EG-DAG: AND_INT
130; EG-DAG: LSHR
131; EG-DAG: SUB_INT
132; EG-DAG: AND_INT
133; EG-DAG: ASHR
134; EG-DAG: AND_INT
135; EG-DAG: OR_INT
136; EG-DAG: SUB_INT
137; EG-DAG: LSHL
138; EG-DAG: LSHL
139; EG-DAG: SUB_INT
140; EG-DAG: LSHR
141; EG-DAG: LSHR
142; EG-DAG: SETGT_UINT
143; EG-DAG: SETGT_INT
144; EG-DAG: XOR_INT
145; EG-DAG: XOR_INT
146; EG-DAG: SUB_INT
147; EG-DAG: SUB_INT
148; EG-DAG: CNDE_INT
149; EG-DAG: CNDE_INT
150; EG-DAG: AND_INT
151; EG-DAG: LSHR
152; EG-DAG: SUB_INT
153; EG-DAG: AND_INT
154; EG-DAG: ASHR
155; EG-DAG: AND_INT
156; EG-DAG: OR_INT
157; EG-DAG: SUB_INT
158; EG-DAG: LSHL
159; EG-DAG: LSHL
160; EG-DAG: SUB_INT
161; EG-DAG: LSHR
162; EG-DAG: LSHR
163; EG-DAG: SETGT_UINT
164; EG-DAG: SETGT_INT
165; EG-DAG: XOR_INT
166; EG-DAG: XOR_INT
167; EG-DAG: SUB_INT
168; EG-DAG: SUB_INT
169; EG-DAG: CNDE_INT
170; EG-DAG: CNDE_INT
171; EG-DAG: AND_INT
172; EG-DAG: LSHR
173; EG-DAG: SUB_INT
174; EG-DAG: AND_INT
175; EG-DAG: ASHR
176; EG-DAG: AND_INT
177; EG-DAG: OR_INT
178; EG-DAG: SUB_INT
179; EG-DAG: LSHL
180; EG-DAG: LSHL
181; EG-DAG: SUB_INT
182; EG-DAG: LSHR
183; EG-DAG: LSHR
184; EG-DAG: SETGT_UINT
185; EG-DAG: SETGT_INT
186; EG-DAG: XOR_INT
187; EG-DAG: XOR_INT
188; EG-DAG: SUB_INT
189; EG-DAG: SUB_INT
190; EG-DAG: CNDE_INT
191; EG-DAG: CNDE_INT
192; EG-DAG: AND_INT
193; EG-DAG: LSHR
194; EG-DAG: SUB_INT
195; EG-DAG: AND_INT
196; EG-DAG: ASHR
197; EG-DAG: AND_INT
198; EG-DAG: OR_INT
199; EG-DAG: SUB_INT
200; EG-DAG: LSHL
201; EG-DAG: LSHL
202; EG-DAG: SUB_INT
203; EG-DAG: LSHR
204; EG-DAG: LSHR
205; EG-DAG: SETGT_UINT
206; EG-DAG: SETGT_INT
207; EG-DAG: XOR_INT
208; EG-DAG: XOR_INT
209; EG-DAG: SUB_INT
210; EG-DAG: SUB_INT
211; EG-DAG: CNDE_INT
212; EG-DAG: CNDE_INT
213
214; GCN: s_endpgm
215define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
216  %conv = fptoui <4 x float> %x to <4 x i64>
217  store <4 x i64> %conv, <4 x i64> addrspace(1)* %out
218  ret void
219}
220
221
222; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i1:
223; GCN: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{[0-9]+}}
224
225; EG: AND_INT
226; EG: SETE_DX10 {{[*]?}} T{{[0-9]+}}.{{[XYZW]}}, KC0[2].Z, 1.0,
227define amdgpu_kernel void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 {
228  %conv = fptoui float %in to i1
229  store i1 %conv, i1 addrspace(1)* %out
230  ret void
231}
232
233; FUNC-LABEL: {{^}}fp_to_uint_fabs_f32_to_i1:
234; GCN: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, |s{{[0-9]+}}|
235define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 {
236  %in.fabs = call float @llvm.fabs.f32(float %in)
237  %conv = fptoui float %in.fabs to i1
238  store i1 %conv, i1 addrspace(1)* %out
239  ret void
240}
241
242; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i16:
243; The reason different instructions are used on SI and VI is because for
244; SI fp_to_uint is legalized by the type legalizer and for VI it is
245; legalized by the dag legalizer and they legalize fp_to_uint differently.
246; SI: v_cvt_u32_f32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
247; VI: v_cvt_i32_f32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
248; GCN: buffer_store_short [[VAL]]
249define amdgpu_kernel void @fp_to_uint_f32_to_i16(i16 addrspace(1)* %out, float %in) #0 {
250  %uint = fptoui float %in to i16
251  store i16 %uint, i16 addrspace(1)* %out
252  ret void
253}
254
255attributes #0 = { nounwind }
256attributes #1 = { nounwind readnone }
257