1; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,SI,FUNC %s
2; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,VI,GFX8_9_10,FUNC %s
3; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9_10,GFX8_9_10,FUNC %s
4; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10,GFX9_10,GFX8_9_10,FUNC %s
5; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cypress -verify-machineinstrs < %s | FileCheck --check-prefixes=EG,FUNC %s
6
7; FUNC-LABEL: {{^}}v_test_imin_sle_i32:
8; GCN: v_min_i32_e32
9
10; EG: MIN_INT
11define amdgpu_kernel void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
12  %tid = call i32 @llvm.amdgcn.workitem.id.x()
13  %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
14  %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
15  %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
16  %a = load i32, i32 addrspace(1)* %a.gep, align 4
17  %b = load i32, i32 addrspace(1)* %b.gep, align 4
18  %cmp = icmp sle i32 %a, %b
19  %val = select i1 %cmp, i32 %a, i32 %b
20  store i32 %val, i32 addrspace(1)* %out.gep, align 4
21  ret void
22}
23
24; FUNC-LABEL: {{^}}s_test_imin_sle_i32:
25; GCN: s_min_i32
26
27; EG: MIN_INT
28define amdgpu_kernel void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
29  %cmp = icmp sle i32 %a, %b
30  %val = select i1 %cmp, i32 %a, i32 %b
31  store i32 %val, i32 addrspace(1)* %out, align 4
32  ret void
33}
34
35; FUNC-LABEL: {{^}}s_test_imin_sle_v1i32:
36; GCN: s_min_i32
37
38; EG: MIN_INT
39define amdgpu_kernel void @s_test_imin_sle_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) #0 {
40  %cmp = icmp sle <1 x i32> %a, %b
41  %val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b
42  store <1 x i32> %val, <1 x i32> addrspace(1)* %out
43  ret void
44}
45
46; FUNC-LABEL: {{^}}s_test_imin_sle_v4i32:
47; GCN: s_min_i32
48; GCN: s_min_i32
49; GCN: s_min_i32
50; GCN: s_min_i32
51
52; EG: MIN_INT
53; EG: MIN_INT
54; EG: MIN_INT
55; EG: MIN_INT
56define amdgpu_kernel void @s_test_imin_sle_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) #0 {
57  %cmp = icmp sle <4 x i32> %a, %b
58  %val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
59  store <4 x i32> %val, <4 x i32> addrspace(1)* %out
60  ret void
61}
62
63; FUNC-LABEL: {{^}}s_test_imin_sle_i8:
64; GCN: s_load_dword
65; GCN: s_load_dword
66; GCN: s_sext_i32_i8
67; GCN: s_sext_i32_i8
68; GCN: s_min_i32
69define amdgpu_kernel void @s_test_imin_sle_i8(i8 addrspace(1)* %out, [8 x i32], i8 %a, [8 x i32], i8 %b) #0 {
70  %cmp = icmp sle i8 %a, %b
71  %val = select i1 %cmp, i8 %a, i8 %b
72  store i8 %val, i8 addrspace(1)* %out
73  ret void
74}
75
76; FIXME: Why vector and sdwa for last element?
77; FUNC-LABEL: {{^}}s_test_imin_sle_v4i8:
78; GCN-DAG: s_load_dwordx2
79; GCN-DAG: s_load_dword s
80; GCN-DAG: s_load_dword s
81; GCN-NOT: _load_
82
83; SI: s_min_i32
84; SI: s_min_i32
85; SI: s_min_i32
86; SI: s_min_i32
87
88; VI: s_min_i32
89; VI: s_min_i32
90; VI: s_min_i32
91; VI: v_min_i32_sdwa
92
93; GFX9_10: v_min_i16
94; GFX9_10: v_min_i16
95; GFX9_10: v_min_i16
96; GFX9_10: v_min_i16
97
98; EG: MIN_INT
99; EG: MIN_INT
100; EG: MIN_INT
101; EG: MIN_INT
102define amdgpu_kernel void @s_test_imin_sle_v4i8(<4 x i8> addrspace(1)* %out, [8 x i32], <4 x i8> %a, [8 x i32], <4 x i8> %b) #0 {
103  %cmp = icmp sle <4 x i8> %a, %b
104  %val = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b
105  store <4 x i8> %val, <4 x i8> addrspace(1)* %out
106  ret void
107}
108
109; FUNC-LABEL: {{^}}s_test_imin_sle_v2i16:
110; GCN: s_load_dword s
111; GCN: s_load_dword s
112
113; SI: s_ashr_i32
114; SI: s_ashr_i32
115; SI: s_sext_i32_i16
116; SI: s_sext_i32_i16
117; SI: s_min_i32
118; SI: s_min_i32
119
120; VI: s_sext_i32_i16
121; VI: s_sext_i32_i16
122; VI: s_min_i32
123; VI: s_min_i32
124
125; GFX9_10: v_pk_min_i16
126
127; EG: MIN_INT
128; EG: MIN_INT
129define amdgpu_kernel void @s_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 {
130  %cmp = icmp sle <2 x i16> %a, %b
131  %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
132  store <2 x i16> %val, <2 x i16> addrspace(1)* %out
133  ret void
134}
135
136; FUNC-LABEL: {{^}}s_test_imin_sle_v4i16:
137; SI-NOT: buffer_load
138; SI: s_min_i32
139; SI: s_min_i32
140; SI: s_min_i32
141; SI: s_min_i32
142
143; VI: s_min_i32
144; VI: s_min_i32
145; VI: s_min_i32
146; VI: s_min_i32
147
148; GFX9_10: v_pk_min_i16
149; GFX9_10: v_pk_min_i16
150
151; EG: MIN_INT
152; EG: MIN_INT
153; EG: MIN_INT
154; EG: MIN_INT
155define amdgpu_kernel void @s_test_imin_sle_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) #0 {
156  %cmp = icmp sle <4 x i16> %a, %b
157  %val = select <4 x i1> %cmp, <4 x i16> %a, <4 x i16> %b
158  store <4 x i16> %val, <4 x i16> addrspace(1)* %out
159  ret void
160}
161
162; FUNC-LABEL: @v_test_imin_slt_i32
163; GCN: v_min_i32_e32
164
165; EG: MIN_INT
166define amdgpu_kernel void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #0 {
167  %tid = call i32 @llvm.amdgcn.workitem.id.x()
168  %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %aptr, i32 %tid
169  %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid
170  %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
171  %a = load i32, i32 addrspace(1)* %a.gep, align 4
172  %b = load i32, i32 addrspace(1)* %b.gep, align 4
173  %cmp = icmp slt i32 %a, %b
174  %val = select i1 %cmp, i32 %a, i32 %b
175  store i32 %val, i32 addrspace(1)* %out.gep, align 4
176  ret void
177}
178
179; FUNC-LABEL: @v_test_imin_slt_i16
180; SI: v_min_i32_e32
181
182; GFX8_9: v_min_i16_e32
183; GFX10:  v_min_i16
184
185; EG: MIN_INT
186define amdgpu_kernel void @v_test_imin_slt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) #0 {
187  %tid = call i32 @llvm.amdgcn.workitem.id.x()
188  %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %aptr, i32 %tid
189  %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %bptr, i32 %tid
190  %out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
191
192  %a = load i16, i16 addrspace(1)* %a.gep
193  %b = load i16, i16 addrspace(1)* %b.gep
194  %cmp = icmp slt i16 %a, %b
195  %val = select i1 %cmp, i16 %a, i16 %b
196  store i16 %val, i16 addrspace(1)* %out.gep
197  ret void
198}
199
200; FUNC-LABEL: @s_test_imin_slt_i32
201; GCN: s_min_i32
202
203; EG: MIN_INT
204define amdgpu_kernel void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
205  %cmp = icmp slt i32 %a, %b
206  %val = select i1 %cmp, i32 %a, i32 %b
207  store i32 %val, i32 addrspace(1)* %out, align 4
208  ret void
209}
210
211; FUNC-LABEL: {{^}}s_test_imin_slt_v2i32:
212; GCN: s_min_i32
213; GCN: s_min_i32
214
215; EG: MIN_INT
216; EG: MIN_INT
217define amdgpu_kernel void @s_test_imin_slt_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
218  %cmp = icmp slt <2 x i32> %a, %b
219  %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b
220  store <2 x i32> %val, <2 x i32> addrspace(1)* %out
221  ret void
222}
223
224; FUNC-LABEL: {{^}}s_test_imin_slt_imm_i32:
225; GCN: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
226
227; EG: MIN_INT {{.*}}literal.{{[xyzw]}}
228define amdgpu_kernel void @s_test_imin_slt_imm_i32(i32 addrspace(1)* %out, i32 %a) #0 {
229  %cmp = icmp slt i32 %a, 8
230  %val = select i1 %cmp, i32 %a, i32 8
231  store i32 %val, i32 addrspace(1)* %out, align 4
232  ret void
233}
234
235; FUNC-LABEL: {{^}}s_test_imin_sle_imm_i32:
236; GCN: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
237
238; EG: MIN_INT {{.*}}literal.{{[xyzw]}}
239define amdgpu_kernel void @s_test_imin_sle_imm_i32(i32 addrspace(1)* %out, i32 %a) #0 {
240  %cmp = icmp sle i32 %a, 8
241  %val = select i1 %cmp, i32 %a, i32 8
242  store i32 %val, i32 addrspace(1)* %out, align 4
243  ret void
244}
245
246; FUNC-LABEL: @v_test_umin_ule_i32
247; GCN: v_min_u32_e32
248
249; EG: MIN_UINT
250define amdgpu_kernel void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
251  %tid = call i32 @llvm.amdgcn.workitem.id.x()
252  %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
253  %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
254  %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
255  %a = load i32, i32 addrspace(1)* %a.gep, align 4
256  %b = load i32, i32 addrspace(1)* %b.gep, align 4
257  %cmp = icmp ule i32 %a, %b
258  %val = select i1 %cmp, i32 %a, i32 %b
259  store i32 %val, i32 addrspace(1)* %out.gep, align 4
260  ret void
261}
262
263; FUNC-LABEL: @v_test_umin_ule_v3i32
264; GCN: v_min_u32_e32
265; GCN: v_min_u32_e32
266; GCN: v_min_u32_e32
267; GCN-NOT: v_min_u32_e32
268; GCN: s_endpgm
269
270; EG: MIN_UINT
271; EG: MIN_UINT
272; EG: MIN_UINT
273define amdgpu_kernel void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %a.ptr, <3 x i32> addrspace(1)* %b.ptr) #0 {
274  %tid = call i32 @llvm.amdgcn.workitem.id.x()
275  %a.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %a.ptr, i32 %tid
276  %b.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %b.ptr, i32 %tid
277  %out.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid
278
279  %a = load <3 x i32>, <3 x i32> addrspace(1)* %a.gep
280  %b = load <3 x i32>, <3 x i32> addrspace(1)* %b.gep
281  %cmp = icmp ule <3 x i32> %a, %b
282  %val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b
283  store <3 x i32> %val, <3 x i32> addrspace(1)* %out.gep
284  ret void
285}
286
287; FIXME: Reduce unused packed component to scalar
288; FUNC-LABEL: @v_test_umin_ule_v3i16{{$}}
289; SI: v_min_u32_e32
290; SI: v_min_u32_e32
291; SI: v_min_u32_e32
292; SI-NOT: v_min_u32_e32
293
294; VI: v_min_u16_e32
295; VI: v_min_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
296; VI: v_min_u16_e32
297; VI-NOT: v_min_u16
298
299; GFX9_10: v_pk_min_u16
300; GFX9_10: v_pk_min_u16
301
302; GCN: s_endpgm
303
304; EG: MIN_UINT
305; EG: MIN_UINT
306; EG: MIN_UINT
307define amdgpu_kernel void @v_test_umin_ule_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %a.ptr, <3 x i16> addrspace(1)* %b.ptr) #0 {
308  %tid = call i32 @llvm.amdgcn.workitem.id.x()
309  %a.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %a.ptr, i32 %tid
310  %b.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %b.ptr, i32 %tid
311  %out.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %out, i32 %tid
312
313  %a = load <3 x i16>, <3 x i16> addrspace(1)* %a.gep
314  %b = load <3 x i16>, <3 x i16> addrspace(1)* %b.gep
315  %cmp = icmp ule <3 x i16> %a, %b
316  %val = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
317  store <3 x i16> %val, <3 x i16> addrspace(1)* %out.gep
318  ret void
319}
320
321; FUNC-LABEL: @s_test_umin_ule_i32
322; GCN: s_min_u32
323
324; EG: MIN_UINT
325define amdgpu_kernel void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
326  %cmp = icmp ule i32 %a, %b
327  %val = select i1 %cmp, i32 %a, i32 %b
328  store i32 %val, i32 addrspace(1)* %out, align 4
329  ret void
330}
331
332; FUNC-LABEL: @v_test_umin_ult_i32
333; GCN: v_min_u32_e32
334
335; EG: MIN_UINT
336define amdgpu_kernel void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
337  %tid = call i32 @llvm.amdgcn.workitem.id.x()
338  %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
339  %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
340  %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
341  %a = load i32, i32 addrspace(1)* %a.gep, align 4
342  %b = load i32, i32 addrspace(1)* %b.gep, align 4
343  %cmp = icmp ult i32 %a, %b
344  %val = select i1 %cmp, i32 %a, i32 %b
345  store i32 %val, i32 addrspace(1)* %out.gep, align 4
346  ret void
347}
348
349; FUNC-LABEL: {{^}}v_test_umin_ult_i8:
350; SI: {{buffer|flat|global}}_load_ubyte
351; SI: {{buffer|flat|global}}_load_ubyte
352; SI: v_min_u32_e32
353
354; GFX8_9_10: {{flat|global}}_load_ubyte
355; GFX8_9_10: {{flat|global}}_load_ubyte
356; GFX8_9:    v_min_u16_e32
357; GFX10:     v_min_u16
358
359; EG: MIN_UINT
360define amdgpu_kernel void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %a.ptr, i8 addrspace(1)* %b.ptr) #0 {
361  %tid = call i32 @llvm.amdgcn.workitem.id.x()
362  %a.gep = getelementptr inbounds i8, i8 addrspace(1)* %a.ptr, i32 %tid
363  %b.gep = getelementptr inbounds i8, i8 addrspace(1)* %b.ptr, i32 %tid
364  %out.gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i32 %tid
365
366  %a = load i8, i8 addrspace(1)* %a.gep, align 1
367  %b = load i8, i8 addrspace(1)* %b.gep, align 1
368  %cmp = icmp ult i8 %a, %b
369  %val = select i1 %cmp, i8 %a, i8 %b
370  store i8 %val, i8 addrspace(1)* %out.gep, align 1
371  ret void
372}
373
374; FUNC-LABEL: @s_test_umin_ult_i32
375; GCN: s_min_u32
376
377; EG: MIN_UINT
378define amdgpu_kernel void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
379  %cmp = icmp ult i32 %a, %b
380  %val = select i1 %cmp, i32 %a, i32 %b
381  store i32 %val, i32 addrspace(1)* %out, align 4
382  ret void
383}
384
385; FUNC-LABEL: @v_test_umin_ult_i32_multi_use
386; SI-NOT: v_min
387; GCN: v_cmp_lt_u32
388; SI-NOT: v_min
389; SI: v_cndmask_b32
390; SI-NOT: v_min
391; GCN: s_endpgm
392
393; EG-NOT: MIN_UINT
394define amdgpu_kernel void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace(1)* %out1, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #0 {
395  %a = load i32, i32 addrspace(1)* %aptr, align 4
396  %b = load i32, i32 addrspace(1)* %bptr, align 4
397  %cmp = icmp ult i32 %a, %b
398  %val = select i1 %cmp, i32 %a, i32 %b
399  store i32 %val, i32 addrspace(1)* %out0, align 4
400  store i1 %cmp, i1 addrspace(1)* %out1
401  ret void
402}
403
404; FUNC-LABEL: @v_test_umin_ult_i16_multi_use
405; GCN-NOT: v_min
406; GCN: v_cmp_lt_u32
407; GCN: v_cndmask_b32
408; GCN-NOT: v_min
409; GCN: s_endpgm
410
411; EG-NOT: MIN_UINT
412define amdgpu_kernel void @v_test_umin_ult_i16_multi_use(i16 addrspace(1)* %out0, i1 addrspace(1)* %out1, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) #0 {
413  %a = load i16, i16 addrspace(1)* %aptr, align 2
414  %b = load i16, i16 addrspace(1)* %bptr, align 2
415  %cmp = icmp ult i16 %a, %b
416  %val = select i1 %cmp, i16 %a, i16 %b
417  store i16 %val, i16 addrspace(1)* %out0, align 2
418  store i1 %cmp, i1 addrspace(1)* %out1
419  ret void
420}
421
422
423; FUNC-LABEL: @s_test_umin_ult_v1i32
424; GCN: s_min_u32
425
426; EG: MIN_UINT
427define amdgpu_kernel void @s_test_umin_ult_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) #0 {
428  %cmp = icmp ult <1 x i32> %a, %b
429  %val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b
430  store <1 x i32> %val, <1 x i32> addrspace(1)* %out
431  ret void
432}
433
434; FUNC-LABEL: {{^}}s_test_umin_ult_v8i32:
435; GCN: s_min_u32
436; GCN: s_min_u32
437; GCN: s_min_u32
438; GCN: s_min_u32
439; GCN: s_min_u32
440; GCN: s_min_u32
441; GCN: s_min_u32
442; GCN: s_min_u32
443
444; EG: MIN_UINT
445; EG: MIN_UINT
446; EG: MIN_UINT
447; EG: MIN_UINT
448; EG: MIN_UINT
449; EG: MIN_UINT
450; EG: MIN_UINT
451; EG: MIN_UINT
452define amdgpu_kernel void @s_test_umin_ult_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) #0 {
453  %cmp = icmp ult <8 x i32> %a, %b
454  %val = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
455  store <8 x i32> %val, <8 x i32> addrspace(1)* %out
456  ret void
457}
458
459; FUNC-LABEL: {{^}}s_test_umin_ult_v8i16:
460; GCN-NOT: {{buffer|flat|global}}_load
461; SI: s_min_u32
462; SI: s_min_u32
463; SI: s_min_u32
464; SI: s_min_u32
465; SI: s_min_u32
466; SI: s_min_u32
467; SI: s_min_u32
468; SI: s_min_u32
469
470; VI: s_min_u32
471; VI: s_min_u32
472; VI: s_min_u32
473; VI: s_min_u32
474; VI: s_min_u32
475; VI: s_min_u32
476; VI: s_min_u32
477; VI: s_min_u32
478
479; EG: MIN_UINT
480; EG: MIN_UINT
481; EG: MIN_UINT
482; EG: MIN_UINT
483; EG: MIN_UINT
484; EG: MIN_UINT
485; EG: MIN_UINT
486; EG: MIN_UINT
487define amdgpu_kernel void @s_test_umin_ult_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) #0 {
488  %cmp = icmp ult <8 x i16> %a, %b
489  %val = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
490  store <8 x i16> %val, <8 x i16> addrspace(1)* %out
491  ret void
492}
493
494; Make sure redundant and removed
495; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umin_ult_i16:
496; GCN-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xa|0x28}}
497; GCN-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
498; GCN: s_min_u32 [[MIN:s[0-9]+]], s{{[0-9]}}, s{{[0-9]}}
499; GCN: v_mov_b32_e32 [[VMIN:v[0-9]+]], s{{[0-9]}}
500; GCN: buffer_store_dword [[VMIN]]
501
502; EG: MIN_UINT
503define amdgpu_kernel void @simplify_demanded_bits_test_umin_ult_i16(i32 addrspace(1)* %out, [8 x i32], i16 zeroext %a, [8 x i32], i16 zeroext %b) #0 {
504  %a.ext = zext i16 %a to i32
505  %b.ext = zext i16 %b to i32
506  %cmp = icmp ult i32 %a.ext, %b.ext
507  %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
508  %mask = and i32 %val, 65535
509  store i32 %mask, i32 addrspace(1)* %out
510  ret void
511}
512
513; Make sure redundant sign_extend_inreg removed.
514
515; FUNC-LABEL: {{^}}simplify_demanded_bits_test_min_slt_i16:
516; GCN-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xa|0x28}}
517; GCN-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
518; GCN-DAG: s_sext_i32_i16 [[EXT_A:s[0-9]+]], [[A]]
519; GCN-DAG: s_sext_i32_i16 [[EXT_B:s[0-9]+]], [[B]]
520
521; GCN: s_min_i32 [[MIN:s[0-9]+]], [[EXT_A]], [[EXT_B]]
522; GCN: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
523; GCN: buffer_store_dword [[VMIN]]
524
525; EG: MIN_INT
526define amdgpu_kernel void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, [8 x i32], i16 signext %a, [8 x i32], i16 signext %b) #0 {
527  %a.ext = sext i16 %a to i32
528  %b.ext = sext i16 %b to i32
529  %cmp = icmp slt i32 %a.ext, %b.ext
530  %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
531  %shl = shl i32 %val, 16
532  %sextinreg = ashr i32 %shl, 16
533  store i32 %sextinreg, i32 addrspace(1)* %out
534  ret void
535}
536
537; FUNC-LABEL: {{^}}s_test_imin_sle_i16:
538; GCN: s_min_i32
539
540; EG: MIN_INT
541define amdgpu_kernel void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) #0 {
542  %cmp = icmp sle i16 %a, %b
543  %val = select i1 %cmp, i16 %a, i16 %b
544  store i16 %val, i16 addrspace(1)* %out
545  ret void
546}
547
548; 64 bit
549; FUNC-LABEL: {{^}}test_umin_ult_i64
550; GCN: s_endpgm
551
552; EG: MIN_UINT
553; EG: MIN_UINT
554define amdgpu_kernel void @test_umin_ult_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
555  %tmp = icmp ult i64 %a, %b
556  %val = select i1 %tmp, i64 %a, i64 %b
557  store i64 %val, i64 addrspace(1)* %out, align 8
558  ret void
559}
560
561; FUNC-LABEL: {{^}}test_umin_ule_i64
562; GCN: s_endpgm
563
564; EG: MIN_UINT
565; EG: MIN_UINT
566define amdgpu_kernel void @test_umin_ule_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
567  %tmp = icmp ule i64 %a, %b
568  %val = select i1 %tmp, i64 %a, i64 %b
569  store i64 %val, i64 addrspace(1)* %out, align 8
570  ret void
571}
572
573; FUNC-LABEL: {{^}}test_imin_slt_i64
574; GCN: s_endpgm
575
576; EG-DAG: MIN_UINT
577; EG-DAG: MIN_INT
578define amdgpu_kernel void @test_imin_slt_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
579  %tmp = icmp slt i64 %a, %b
580  %val = select i1 %tmp, i64 %a, i64 %b
581  store i64 %val, i64 addrspace(1)* %out, align 8
582  ret void
583}
584
585; FUNC-LABEL: {{^}}test_imin_sle_i64
586; GCN: s_endpgm
587
588; EG-DAG: MIN_UINT
589; EG-DAG: MIN_INT
590define amdgpu_kernel void @test_imin_sle_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
591  %tmp = icmp sle i64 %a, %b
592  %val = select i1 %tmp, i64 %a, i64 %b
593  store i64 %val, i64 addrspace(1)* %out, align 8
594  ret void
595}
596
597; FUNC-LABEL: {{^}}v_test_imin_sle_v2i16:
598; SI: v_min_i32
599; SI: v_min_i32
600
601; VI: v_min_i16
602; VI: v_min_i16
603
604; GFX9_10: v_pk_min_i16
605
606; EG: MIN_INT
607; EG: MIN_INT
608define amdgpu_kernel void @v_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 {
609  %tid = call i32 @llvm.amdgcn.workitem.id.x()
610  %a.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a.ptr, i32 %tid
611  %b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b.ptr, i32 %tid
612  %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
613  %a = load <2 x i16>, <2 x i16> addrspace(1)* %a.gep
614  %b = load <2 x i16>, <2 x i16> addrspace(1)* %b.gep
615  %cmp = icmp sle <2 x i16> %a, %b
616  %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
617  store <2 x i16> %val, <2 x i16> addrspace(1)* %out.gep
618  ret void
619}
620
621; FIXME: i16 min
622; FUNC-LABEL: {{^}}v_test_imin_ule_v2i16:
623; SI: v_min_u32
624; SI: v_min_u32
625
626; VI: v_min_u16
627; VI: v_min_u16
628
629; GFX9_10: v_pk_min_u16
630
631; EG: MIN_UINT
632; EG: MIN_UINT
633define amdgpu_kernel void @v_test_imin_ule_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 {
634  %tid = call i32 @llvm.amdgcn.workitem.id.x()
635  %a.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a.ptr, i32 %tid
636  %b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b.ptr, i32 %tid
637  %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
638  %a = load <2 x i16>, <2 x i16> addrspace(1)* %a.gep
639  %b = load <2 x i16>, <2 x i16> addrspace(1)* %b.gep
640  %cmp = icmp ule <2 x i16> %a, %b
641  %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
642  store <2 x i16> %val, <2 x i16> addrspace(1)* %out.gep
643  ret void
644}
645
646declare i32 @llvm.amdgcn.workitem.id.x() #1
647
648attributes #0 = { nounwind }
649attributes #1 = { nounwind readnone }
650