1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
3
4declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) #0
5declare i64 @llvm.amdgcn.fcmp.f64(double, double, i32) #0
6declare float @llvm.fabs.f32(float) #0
7
8declare i64 @llvm.amdgcn.fcmp.f16(half, half, i32) #0
9declare half @llvm.fabs.f16(half) #0
10
11; GCN-LABEL: {{^}}v_fcmp_f32_oeq_with_fabs:
12; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}, |{{v[0-9]+}}|
13define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(i64 addrspace(1)* %out, float %src, float %a) {
14  %temp = call float @llvm.fabs.f32(float %a)
15  %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float %temp, i32 1)
16  store i64 %result, i64 addrspace(1)* %out
17  ret void
18}
19
20; GCN-LABEL: {{^}}v_fcmp_f32_oeq_both_operands_with_fabs:
21; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, |{{s[0-9]+}}|, |{{v[0-9]+}}|
22define amdgpu_kernel void @v_fcmp_f32_oeq_both_operands_with_fabs(i64 addrspace(1)* %out, float %src, float %a) {
23  %temp = call float @llvm.fabs.f32(float %a)
24  %src_input = call float @llvm.fabs.f32(float %src)
25  %result = call i64 @llvm.amdgcn.fcmp.f32(float %src_input, float %temp, i32 1)
26  store i64 %result, i64 addrspace(1)* %out
27  ret void
28}
29
30; GCN-LABEL: {{^}}v_fcmp_f32:
31; GCN-NOT: v_cmp_eq_f32_e64
32define amdgpu_kernel void @v_fcmp_f32(i64 addrspace(1)* %out, float %src) {
33  %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 -1)
34  store i64 %result, i64 addrspace(1)* %out
35  ret void
36}
37
38; GCN-LABEL: {{^}}v_fcmp_f32_oeq:
39; GCN: v_cmp_eq_f32_e64
40define amdgpu_kernel void @v_fcmp_f32_oeq(i64 addrspace(1)* %out, float %src) {
41  %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 1)
42  store i64 %result, i64 addrspace(1)* %out
43  ret void
44}
45
46; GCN-LABEL: {{^}}v_fcmp_f32_one:
47; GCN: v_cmp_neq_f32_e64
48define amdgpu_kernel void @v_fcmp_f32_one(i64 addrspace(1)* %out, float %src) {
49  %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 6)
50  store i64 %result, i64 addrspace(1)* %out
51  ret void
52}
53
54; GCN-LABEL: {{^}}v_fcmp_f32_ogt:
55; GCN: v_cmp_gt_f32_e64
56define amdgpu_kernel void @v_fcmp_f32_ogt(i64 addrspace(1)* %out, float %src) {
57  %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 2)
58  store i64 %result, i64 addrspace(1)* %out
59  ret void
60}
61
62; GCN-LABEL: {{^}}v_fcmp_f32_oge:
63; GCN: v_cmp_ge_f32_e64
64define amdgpu_kernel void @v_fcmp_f32_oge(i64 addrspace(1)* %out, float %src) {
65  %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 3)
66  store i64 %result, i64 addrspace(1)* %out
67  ret void
68}
69
70; GCN-LABEL: {{^}}v_fcmp_f32_olt:
71; GCN: v_cmp_lt_f32_e64
72define amdgpu_kernel void @v_fcmp_f32_olt(i64 addrspace(1)* %out, float %src) {
73  %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 4)
74  store i64 %result, i64 addrspace(1)* %out
75  ret void
76}
77
78; GCN-LABEL: {{^}}v_fcmp_f32_ole:
79; GCN: v_cmp_le_f32_e64
80define amdgpu_kernel void @v_fcmp_f32_ole(i64 addrspace(1)* %out, float %src) {
81  %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 5)
82  store i64 %result, i64 addrspace(1)* %out
83  ret void
84}
85
86
87; GCN-LABEL: {{^}}v_fcmp_f32_ueq:
88; GCN: v_cmp_nlg_f32_e64
89define amdgpu_kernel void @v_fcmp_f32_ueq(i64 addrspace(1)* %out, float %src) {
90  %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 9)
91  store i64 %result, i64 addrspace(1)* %out
92  ret void
93}
94
95; GCN-LABEL: {{^}}v_fcmp_f32_une:
96; GCN: v_cmp_neq_f32_e64
97define amdgpu_kernel void @v_fcmp_f32_une(i64 addrspace(1)* %out, float %src) {
98  %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 14)
99  store i64 %result, i64 addrspace(1)* %out
100  ret void
101}
102
103; GCN-LABEL: {{^}}v_fcmp_f32_ugt:
104; GCN: v_cmp_nle_f32_e64
105define amdgpu_kernel void @v_fcmp_f32_ugt(i64 addrspace(1)* %out, float %src) {
106  %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 10)
107  store i64 %result, i64 addrspace(1)* %out
108  ret void
109}
110
111; GCN-LABEL: {{^}}v_fcmp_f32_uge:
112; GCN: v_cmp_nlt_f32_e64
113define amdgpu_kernel void @v_fcmp_f32_uge(i64 addrspace(1)* %out, float %src) {
114  %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 11)
115  store i64 %result, i64 addrspace(1)* %out
116  ret void
117}
118
119; GCN-LABEL: {{^}}v_fcmp_f32_ult:
120; GCN: v_cmp_nge_f32_e64
121define amdgpu_kernel void @v_fcmp_f32_ult(i64 addrspace(1)* %out, float %src) {
122  %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 12)
123  store i64 %result, i64 addrspace(1)* %out
124  ret void
125}
126
127; GCN-LABEL: {{^}}v_fcmp_f32_ule:
128; GCN: v_cmp_ngt_f32_e64
129define amdgpu_kernel void @v_fcmp_f32_ule(i64 addrspace(1)* %out, float %src) {
130  %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 13)
131  store i64 %result, i64 addrspace(1)* %out
132  ret void
133}
134
135; GCN-LABEL: {{^}}v_fcmp_f64_oeq:
136; GCN: v_cmp_eq_f64_e64
137define amdgpu_kernel void @v_fcmp_f64_oeq(i64 addrspace(1)* %out, double %src) {
138  %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 1)
139  store i64 %result, i64 addrspace(1)* %out
140  ret void
141}
142
143; GCN-LABEL: {{^}}v_fcmp_f64_one:
144; GCN: v_cmp_neq_f64_e64
145define amdgpu_kernel void @v_fcmp_f64_one(i64 addrspace(1)* %out, double %src) {
146  %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 6)
147  store i64 %result, i64 addrspace(1)* %out
148  ret void
149}
150
151; GCN-LABEL: {{^}}v_fcmp_f64_ogt:
152; GCN: v_cmp_gt_f64_e64
153define amdgpu_kernel void @v_fcmp_f64_ogt(i64 addrspace(1)* %out, double %src) {
154  %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 2)
155  store i64 %result, i64 addrspace(1)* %out
156  ret void
157}
158
159; GCN-LABEL: {{^}}v_fcmp_f64_oge:
160; GCN: v_cmp_ge_f64_e64
161define amdgpu_kernel void @v_fcmp_f64_oge(i64 addrspace(1)* %out, double %src) {
162  %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 3)
163  store i64 %result, i64 addrspace(1)* %out
164  ret void
165}
166
167; GCN-LABEL: {{^}}v_fcmp_f64_olt:
168; GCN: v_cmp_lt_f64_e64
169define amdgpu_kernel void @v_fcmp_f64_olt(i64 addrspace(1)* %out, double %src) {
170  %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 4)
171  store i64 %result, i64 addrspace(1)* %out
172  ret void
173}
174
175; GCN-LABEL: {{^}}v_fcmp_f64_ole:
176; GCN: v_cmp_le_f64_e64
177define amdgpu_kernel void @v_fcmp_f64_ole(i64 addrspace(1)* %out, double %src) {
178  %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 5)
179  store i64 %result, i64 addrspace(1)* %out
180  ret void
181}
182
183; GCN-LABEL: {{^}}v_fcmp_f64_ueq:
184; GCN: v_cmp_nlg_f64_e64
185define amdgpu_kernel void @v_fcmp_f64_ueq(i64 addrspace(1)* %out, double %src) {
186  %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 9)
187  store i64 %result, i64 addrspace(1)* %out
188  ret void
189}
190
191; GCN-LABEL: {{^}}v_fcmp_f64_une:
192; GCN: v_cmp_neq_f64_e64
193define amdgpu_kernel void @v_fcmp_f64_une(i64 addrspace(1)* %out, double %src) {
194  %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 14)
195  store i64 %result, i64 addrspace(1)* %out
196  ret void
197}
198
199; GCN-LABEL: {{^}}v_fcmp_f64_ugt:
200; GCN: v_cmp_nle_f64_e64
201define amdgpu_kernel void @v_fcmp_f64_ugt(i64 addrspace(1)* %out, double %src) {
202  %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 10)
203  store i64 %result, i64 addrspace(1)* %out
204  ret void
205}
206
207; GCN-LABEL: {{^}}v_fcmp_f64_uge:
208; GCN: v_cmp_nlt_f64_e64
209define amdgpu_kernel void @v_fcmp_f64_uge(i64 addrspace(1)* %out, double %src) {
210  %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 11)
211  store i64 %result, i64 addrspace(1)* %out
212  ret void
213}
214
215; GCN-LABEL: {{^}}v_fcmp_f64_ult:
216; GCN: v_cmp_nge_f64_e64
217define amdgpu_kernel void @v_fcmp_f64_ult(i64 addrspace(1)* %out, double %src) {
218  %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 12)
219  store i64 %result, i64 addrspace(1)* %out
220  ret void
221}
222
223; GCN-LABEL: {{^}}v_fcmp_f64_ule:
224; GCN: v_cmp_ngt_f64_e64
225define amdgpu_kernel void @v_fcmp_f64_ule(i64 addrspace(1)* %out, double %src) {
226  %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 13)
227  store i64 %result, i64 addrspace(1)* %out
228  ret void
229}
230
231; GCN-LABEL: {{^}}v_fcmp_f16_oeq_with_fabs:
232; VI: v_cmp_eq_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}, |{{v[0-9]+}}|
233
234; SI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], s{{[0-9]+}}
235; SI: v_cvt_f32_f16_e64 [[CVT1:v[0-9]+]], |s{{[0-9]+}}|
236; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT0]], [[CVT1]]
237define amdgpu_kernel void @v_fcmp_f16_oeq_with_fabs(i64 addrspace(1)* %out, half %src, half %a) {
238  %temp = call half @llvm.fabs.f16(half %a)
239  %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half %temp, i32 1)
240  store i64 %result, i64 addrspace(1)* %out
241  ret void
242}
243
244; GCN-LABEL: {{^}}v_fcmp_f16_oeq_both_operands_with_fabs:
245; VI: v_cmp_eq_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, |{{s[0-9]+}}|, |{{v[0-9]+}}|
246
247; SI: v_cvt_f32_f16_e64 [[CVT0:v[0-9]+]], |s{{[0-9]+}}|
248; SI: v_cvt_f32_f16_e64 [[CVT1:v[0-9]+]], |s{{[0-9]+}}|
249; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT0]], [[CVT1]]
250define amdgpu_kernel void @v_fcmp_f16_oeq_both_operands_with_fabs(i64 addrspace(1)* %out, half %src, half %a) {
251  %temp = call half @llvm.fabs.f16(half %a)
252  %src_input = call half @llvm.fabs.f16(half %src)
253  %result = call i64 @llvm.amdgcn.fcmp.f16(half %src_input, half %temp, i32 1)
254  store i64 %result, i64 addrspace(1)* %out
255  ret void
256}
257
258; GCN-LABEL: {{^}}v_fcmp_f16:
259; GCN-NOT: v_cmp_eq_
260define amdgpu_kernel void @v_fcmp_f16(i64 addrspace(1)* %out, half %src) {
261  %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 -1)
262  store i64 %result, i64 addrspace(1)* %out
263  ret void
264}
265
266; GCN-LABEL: {{^}}v_fcmp_f16_oeq:
267; VI: v_cmp_eq_f16_e64
268
269; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
270; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
271; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
272define amdgpu_kernel void @v_fcmp_f16_oeq(i64 addrspace(1)* %out, half %src) {
273  %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 1)
274  store i64 %result, i64 addrspace(1)* %out
275  ret void
276}
277
278; GCN-LABEL: {{^}}v_fcmp_f16_one:
279; VI: v_cmp_neq_f16_e64
280
281; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
282; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
283; SI: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
284define amdgpu_kernel void @v_fcmp_f16_one(i64 addrspace(1)* %out, half %src) {
285  %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 6)
286  store i64 %result, i64 addrspace(1)* %out
287  ret void
288}
289
290; GCN-LABEL: {{^}}v_fcmp_f16_ogt:
291; VI: v_cmp_gt_f16_e64
292
293; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
294; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
295; SI: v_cmp_lt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
296define amdgpu_kernel void @v_fcmp_f16_ogt(i64 addrspace(1)* %out, half %src) {
297  %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 2)
298  store i64 %result, i64 addrspace(1)* %out
299  ret void
300}
301
302; GCN-LABEL: {{^}}v_fcmp_f16_oge:
303; VI: v_cmp_ge_f16_e64
304
305; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
306; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
307; SI: v_cmp_le_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
308define amdgpu_kernel void @v_fcmp_f16_oge(i64 addrspace(1)* %out, half %src) {
309  %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 3)
310  store i64 %result, i64 addrspace(1)* %out
311  ret void
312}
313
314; GCN-LABEL: {{^}}v_fcmp_f16_olt:
315; VI: v_cmp_lt_f16_e64
316
317; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
318; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
319; SI: v_cmp_gt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
320define amdgpu_kernel void @v_fcmp_f16_olt(i64 addrspace(1)* %out, half %src) {
321  %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 4)
322  store i64 %result, i64 addrspace(1)* %out
323  ret void
324}
325
326; GCN-LABEL: {{^}}v_fcmp_f16_ole:
327; VI: v_cmp_le_f16_e64
328
329; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
330; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
331; SI: v_cmp_ge_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
332define amdgpu_kernel void @v_fcmp_f16_ole(i64 addrspace(1)* %out, half %src) {
333  %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 5)
334  store i64 %result, i64 addrspace(1)* %out
335  ret void
336}
337
338; GCN-LABEL: {{^}}v_fcmp_f16_ueq:
339; VI: v_cmp_nlg_f16_e64
340
341; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
342; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
343; SI: v_cmp_nlg_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
344define amdgpu_kernel void @v_fcmp_f16_ueq(i64 addrspace(1)* %out, half %src) {
345  %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 9)
346  store i64 %result, i64 addrspace(1)* %out
347  ret void
348}
349
350; GCN-LABEL: {{^}}v_fcmp_f16_une:
351; VI: v_cmp_neq_f16_e64
352
353; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
354; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
355; SI: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
356define amdgpu_kernel void @v_fcmp_f16_une(i64 addrspace(1)* %out, half %src) {
357  %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 14)
358  store i64 %result, i64 addrspace(1)* %out
359  ret void
360}
361
362; GCN-LABEL: {{^}}v_fcmp_f16_ugt:
363; VI: v_cmp_nle_f16_e64
364
365; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
366; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
367; SI: v_cmp_nge_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
368define amdgpu_kernel void @v_fcmp_f16_ugt(i64 addrspace(1)* %out, half %src) {
369  %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 10)
370  store i64 %result, i64 addrspace(1)* %out
371  ret void
372}
373
374; GCN-LABEL: {{^}}v_fcmp_f16_uge:
375; VI: v_cmp_nlt_f16_e64
376
377; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
378; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
379; SI: v_cmp_ngt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
380define amdgpu_kernel void @v_fcmp_f16_uge(i64 addrspace(1)* %out, half %src) {
381  %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 11)
382  store i64 %result, i64 addrspace(1)* %out
383  ret void
384}
385
386; GCN-LABEL: {{^}}v_fcmp_f16_ult:
387; VI: v_cmp_nge_f16_e64
388
389; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
390; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
391; SI: v_cmp_nle_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
392define amdgpu_kernel void @v_fcmp_f16_ult(i64 addrspace(1)* %out, half %src) {
393  %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 12)
394  store i64 %result, i64 addrspace(1)* %out
395  ret void
396}
397
398; GCN-LABEL: {{^}}v_fcmp_f16_ule:
399; VI: v_cmp_ngt_f16_e64
400
401; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
402; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
403; SI: v_cmp_nlt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
404define amdgpu_kernel void @v_fcmp_f16_ule(i64 addrspace(1)* %out, half %src) {
405  %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 13)
406  store i64 %result, i64 addrspace(1)* %out
407  ret void
408}
409
410attributes #0 = { nounwind readnone convergent }
411