1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti < %s | FileCheck -check-prefix=SI %s
3; RUN: llc -mtriple=amdgcn-- -mcpu=fiji < %s | FileCheck -check-prefix=VI %s
4
5define amdgpu_kernel void @test_fmin_legacy_uge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
6; SI-LABEL: test_fmin_legacy_uge_f64:
7; SI:       ; %bb.0:
8; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
9; SI-NEXT:    s_mov_b32 s3, 0xf000
10; SI-NEXT:    s_mov_b32 s10, 0
11; SI-NEXT:    s_mov_b32 s11, s3
12; SI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
13; SI-NEXT:    s_waitcnt lgkmcnt(0)
14; SI-NEXT:    s_mov_b64 s[8:9], s[6:7]
15; SI-NEXT:    v_mov_b32_e32 v1, 0
16; SI-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64
17; SI-NEXT:    s_mov_b32 s2, -1
18; SI-NEXT:    s_mov_b32 s0, s4
19; SI-NEXT:    s_mov_b32 s1, s5
20; SI-NEXT:    s_waitcnt vmcnt(0)
21; SI-NEXT:    v_cmp_nlt_f64_e32 vcc, v[0:1], v[2:3]
22; SI-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
23; SI-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
24; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
25; SI-NEXT:    s_endpgm
26;
27; VI-LABEL: test_fmin_legacy_uge_f64:
28; VI:       ; %bb.0:
29; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
30; VI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
31; VI-NEXT:    s_waitcnt lgkmcnt(0)
32; VI-NEXT:    v_mov_b32_e32 v1, s3
33; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
34; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
35; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
36; VI-NEXT:    v_mov_b32_e32 v4, s0
37; VI-NEXT:    v_mov_b32_e32 v5, s1
38; VI-NEXT:    s_waitcnt vmcnt(0)
39; VI-NEXT:    v_cmp_nlt_f64_e32 vcc, v[0:1], v[2:3]
40; VI-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
41; VI-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
42; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
43; VI-NEXT:    s_endpgm
44  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
45  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
46  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
47
48  %a = load double, double addrspace(1)* %gep.0, align 8
49  %b = load double, double addrspace(1)* %gep.1, align 8
50
51  %cmp = fcmp uge double %a, %b
52  %val = select i1 %cmp, double %b, double %a
53  store double %val, double addrspace(1)* %out, align 8
54  ret void
55}
56
57define amdgpu_kernel void @test_fmin_legacy_ugt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
58; SI-LABEL: test_fmin_legacy_ugt_f64:
59; SI:       ; %bb.0:
60; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
61; SI-NEXT:    s_mov_b32 s3, 0xf000
62; SI-NEXT:    s_mov_b32 s10, 0
63; SI-NEXT:    s_mov_b32 s11, s3
64; SI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
65; SI-NEXT:    s_waitcnt lgkmcnt(0)
66; SI-NEXT:    s_mov_b64 s[8:9], s[6:7]
67; SI-NEXT:    v_mov_b32_e32 v1, 0
68; SI-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64
69; SI-NEXT:    s_mov_b32 s2, -1
70; SI-NEXT:    s_mov_b32 s0, s4
71; SI-NEXT:    s_mov_b32 s1, s5
72; SI-NEXT:    s_waitcnt vmcnt(0)
73; SI-NEXT:    v_cmp_nle_f64_e32 vcc, v[0:1], v[2:3]
74; SI-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
75; SI-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
76; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
77; SI-NEXT:    s_endpgm
78;
79; VI-LABEL: test_fmin_legacy_ugt_f64:
80; VI:       ; %bb.0:
81; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
82; VI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
83; VI-NEXT:    s_waitcnt lgkmcnt(0)
84; VI-NEXT:    v_mov_b32_e32 v1, s3
85; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
86; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
87; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
88; VI-NEXT:    v_mov_b32_e32 v4, s0
89; VI-NEXT:    v_mov_b32_e32 v5, s1
90; VI-NEXT:    s_waitcnt vmcnt(0)
91; VI-NEXT:    v_cmp_nle_f64_e32 vcc, v[0:1], v[2:3]
92; VI-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
93; VI-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
94; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
95; VI-NEXT:    s_endpgm
96  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
97  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
98  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
99
100  %a = load double, double addrspace(1)* %gep.0, align 8
101  %b = load double, double addrspace(1)* %gep.1, align 8
102
103  %cmp = fcmp ugt double %a, %b
104  %val = select i1 %cmp, double %b, double %a
105  store double %val, double addrspace(1)* %out, align 8
106  ret void
107}
108
109define amdgpu_kernel void @test_fmin_legacy_ule_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
110; SI-LABEL: test_fmin_legacy_ule_f64:
111; SI:       ; %bb.0:
112; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
113; SI-NEXT:    s_mov_b32 s3, 0xf000
114; SI-NEXT:    s_mov_b32 s10, 0
115; SI-NEXT:    s_mov_b32 s11, s3
116; SI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
117; SI-NEXT:    s_waitcnt lgkmcnt(0)
118; SI-NEXT:    s_mov_b64 s[8:9], s[6:7]
119; SI-NEXT:    v_mov_b32_e32 v1, 0
120; SI-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64
121; SI-NEXT:    s_mov_b32 s2, -1
122; SI-NEXT:    s_mov_b32 s0, s4
123; SI-NEXT:    s_mov_b32 s1, s5
124; SI-NEXT:    s_waitcnt vmcnt(0)
125; SI-NEXT:    v_cmp_ngt_f64_e32 vcc, v[0:1], v[2:3]
126; SI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
127; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
128; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
129; SI-NEXT:    s_endpgm
130;
131; VI-LABEL: test_fmin_legacy_ule_f64:
132; VI:       ; %bb.0:
133; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
134; VI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
135; VI-NEXT:    s_waitcnt lgkmcnt(0)
136; VI-NEXT:    v_mov_b32_e32 v1, s3
137; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
138; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
139; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
140; VI-NEXT:    v_mov_b32_e32 v4, s0
141; VI-NEXT:    v_mov_b32_e32 v5, s1
142; VI-NEXT:    s_waitcnt vmcnt(0)
143; VI-NEXT:    v_cmp_ngt_f64_e32 vcc, v[0:1], v[2:3]
144; VI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
145; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
146; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
147; VI-NEXT:    s_endpgm
148  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
149  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
150  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
151
152  %a = load double, double addrspace(1)* %gep.0, align 8
153  %b = load double, double addrspace(1)* %gep.1, align 8
154
155  %cmp = fcmp ule double %a, %b
156  %val = select i1 %cmp, double %a, double %b
157  store double %val, double addrspace(1)* %out, align 8
158  ret void
159}
160
161define amdgpu_kernel void @test_fmin_legacy_ult_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
162; SI-LABEL: test_fmin_legacy_ult_f64:
163; SI:       ; %bb.0:
164; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
165; SI-NEXT:    s_mov_b32 s3, 0xf000
166; SI-NEXT:    s_mov_b32 s10, 0
167; SI-NEXT:    s_mov_b32 s11, s3
168; SI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
169; SI-NEXT:    s_waitcnt lgkmcnt(0)
170; SI-NEXT:    s_mov_b64 s[8:9], s[6:7]
171; SI-NEXT:    v_mov_b32_e32 v1, 0
172; SI-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64
173; SI-NEXT:    s_mov_b32 s2, -1
174; SI-NEXT:    s_mov_b32 s0, s4
175; SI-NEXT:    s_mov_b32 s1, s5
176; SI-NEXT:    s_waitcnt vmcnt(0)
177; SI-NEXT:    v_cmp_nge_f64_e32 vcc, v[0:1], v[2:3]
178; SI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
179; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
180; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
181; SI-NEXT:    s_endpgm
182;
183; VI-LABEL: test_fmin_legacy_ult_f64:
184; VI:       ; %bb.0:
185; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
186; VI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
187; VI-NEXT:    s_waitcnt lgkmcnt(0)
188; VI-NEXT:    v_mov_b32_e32 v1, s3
189; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
190; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
191; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
192; VI-NEXT:    v_mov_b32_e32 v4, s0
193; VI-NEXT:    v_mov_b32_e32 v5, s1
194; VI-NEXT:    s_waitcnt vmcnt(0)
195; VI-NEXT:    v_cmp_nge_f64_e32 vcc, v[0:1], v[2:3]
196; VI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
197; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
198; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
199; VI-NEXT:    s_endpgm
200  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
201  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
202  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
203
204  %a = load double, double addrspace(1)* %gep.0, align 8
205  %b = load double, double addrspace(1)* %gep.1, align 8
206
207  %cmp = fcmp ult double %a, %b
208  %val = select i1 %cmp, double %a, double %b
209  store double %val, double addrspace(1)* %out, align 8
210  ret void
211}
212
213define amdgpu_kernel void @test_fmin_legacy_oge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
214; SI-LABEL: test_fmin_legacy_oge_f64:
215; SI:       ; %bb.0:
216; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
217; SI-NEXT:    s_mov_b32 s3, 0xf000
218; SI-NEXT:    s_mov_b32 s10, 0
219; SI-NEXT:    s_mov_b32 s11, s3
220; SI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
221; SI-NEXT:    s_waitcnt lgkmcnt(0)
222; SI-NEXT:    s_mov_b64 s[8:9], s[6:7]
223; SI-NEXT:    v_mov_b32_e32 v1, 0
224; SI-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64
225; SI-NEXT:    s_mov_b32 s2, -1
226; SI-NEXT:    s_mov_b32 s0, s4
227; SI-NEXT:    s_mov_b32 s1, s5
228; SI-NEXT:    s_waitcnt vmcnt(0)
229; SI-NEXT:    v_cmp_ge_f64_e32 vcc, v[0:1], v[2:3]
230; SI-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
231; SI-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
232; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
233; SI-NEXT:    s_endpgm
234;
235; VI-LABEL: test_fmin_legacy_oge_f64:
236; VI:       ; %bb.0:
237; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
238; VI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
239; VI-NEXT:    s_waitcnt lgkmcnt(0)
240; VI-NEXT:    v_mov_b32_e32 v1, s3
241; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
242; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
243; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
244; VI-NEXT:    v_mov_b32_e32 v4, s0
245; VI-NEXT:    v_mov_b32_e32 v5, s1
246; VI-NEXT:    s_waitcnt vmcnt(0)
247; VI-NEXT:    v_cmp_ge_f64_e32 vcc, v[0:1], v[2:3]
248; VI-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
249; VI-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
250; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
251; VI-NEXT:    s_endpgm
252  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
253  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
254  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
255
256  %a = load double, double addrspace(1)* %gep.0, align 8
257  %b = load double, double addrspace(1)* %gep.1, align 8
258
259  %cmp = fcmp oge double %a, %b
260  %val = select i1 %cmp, double %b, double %a
261  store double %val, double addrspace(1)* %out, align 8
262  ret void
263}
264
265define amdgpu_kernel void @test_fmin_legacy_ogt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
266; SI-LABEL: test_fmin_legacy_ogt_f64:
267; SI:       ; %bb.0:
268; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
269; SI-NEXT:    s_mov_b32 s3, 0xf000
270; SI-NEXT:    s_mov_b32 s10, 0
271; SI-NEXT:    s_mov_b32 s11, s3
272; SI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
273; SI-NEXT:    s_waitcnt lgkmcnt(0)
274; SI-NEXT:    s_mov_b64 s[8:9], s[6:7]
275; SI-NEXT:    v_mov_b32_e32 v1, 0
276; SI-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64
277; SI-NEXT:    s_mov_b32 s2, -1
278; SI-NEXT:    s_mov_b32 s0, s4
279; SI-NEXT:    s_mov_b32 s1, s5
280; SI-NEXT:    s_waitcnt vmcnt(0)
281; SI-NEXT:    v_cmp_gt_f64_e32 vcc, v[0:1], v[2:3]
282; SI-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
283; SI-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
284; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
285; SI-NEXT:    s_endpgm
286;
287; VI-LABEL: test_fmin_legacy_ogt_f64:
288; VI:       ; %bb.0:
289; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
290; VI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
291; VI-NEXT:    s_waitcnt lgkmcnt(0)
292; VI-NEXT:    v_mov_b32_e32 v1, s3
293; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
294; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
295; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
296; VI-NEXT:    v_mov_b32_e32 v4, s0
297; VI-NEXT:    v_mov_b32_e32 v5, s1
298; VI-NEXT:    s_waitcnt vmcnt(0)
299; VI-NEXT:    v_cmp_gt_f64_e32 vcc, v[0:1], v[2:3]
300; VI-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
301; VI-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
302; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
303; VI-NEXT:    s_endpgm
304  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
305  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
306  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
307
308  %a = load double, double addrspace(1)* %gep.0, align 8
309  %b = load double, double addrspace(1)* %gep.1, align 8
310
311  %cmp = fcmp ogt double %a, %b
312  %val = select i1 %cmp, double %b, double %a
313  store double %val, double addrspace(1)* %out, align 8
314  ret void
315}
316
317define amdgpu_kernel void @test_fmin_legacy_ole_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
318; SI-LABEL: test_fmin_legacy_ole_f64:
319; SI:       ; %bb.0:
320; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
321; SI-NEXT:    s_mov_b32 s3, 0xf000
322; SI-NEXT:    s_mov_b32 s10, 0
323; SI-NEXT:    s_mov_b32 s11, s3
324; SI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
325; SI-NEXT:    s_waitcnt lgkmcnt(0)
326; SI-NEXT:    s_mov_b64 s[8:9], s[6:7]
327; SI-NEXT:    v_mov_b32_e32 v1, 0
328; SI-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64
329; SI-NEXT:    s_mov_b32 s2, -1
330; SI-NEXT:    s_mov_b32 s0, s4
331; SI-NEXT:    s_mov_b32 s1, s5
332; SI-NEXT:    s_waitcnt vmcnt(0)
333; SI-NEXT:    v_cmp_le_f64_e32 vcc, v[0:1], v[2:3]
334; SI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
335; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
336; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
337; SI-NEXT:    s_endpgm
338;
339; VI-LABEL: test_fmin_legacy_ole_f64:
340; VI:       ; %bb.0:
341; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
342; VI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
343; VI-NEXT:    s_waitcnt lgkmcnt(0)
344; VI-NEXT:    v_mov_b32_e32 v1, s3
345; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
346; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
347; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
348; VI-NEXT:    v_mov_b32_e32 v4, s0
349; VI-NEXT:    v_mov_b32_e32 v5, s1
350; VI-NEXT:    s_waitcnt vmcnt(0)
351; VI-NEXT:    v_cmp_le_f64_e32 vcc, v[0:1], v[2:3]
352; VI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
353; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
354; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
355; VI-NEXT:    s_endpgm
356  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
357  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
358  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
359
360  %a = load double, double addrspace(1)* %gep.0, align 8
361  %b = load double, double addrspace(1)* %gep.1, align 8
362
363  %cmp = fcmp ole double %a, %b
364  %val = select i1 %cmp, double %a, double %b
365  store double %val, double addrspace(1)* %out, align 8
366  ret void
367}
368
369define amdgpu_kernel void @test_fmin_legacy_olt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
370; SI-LABEL: test_fmin_legacy_olt_f64:
371; SI:       ; %bb.0:
372; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
373; SI-NEXT:    s_mov_b32 s3, 0xf000
374; SI-NEXT:    s_mov_b32 s10, 0
375; SI-NEXT:    s_mov_b32 s11, s3
376; SI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
377; SI-NEXT:    s_waitcnt lgkmcnt(0)
378; SI-NEXT:    s_mov_b64 s[8:9], s[6:7]
379; SI-NEXT:    v_mov_b32_e32 v1, 0
380; SI-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64
381; SI-NEXT:    s_mov_b32 s2, -1
382; SI-NEXT:    s_mov_b32 s0, s4
383; SI-NEXT:    s_mov_b32 s1, s5
384; SI-NEXT:    s_waitcnt vmcnt(0)
385; SI-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
386; SI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
387; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
388; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
389; SI-NEXT:    s_endpgm
390;
391; VI-LABEL: test_fmin_legacy_olt_f64:
392; VI:       ; %bb.0:
393; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
394; VI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
395; VI-NEXT:    s_waitcnt lgkmcnt(0)
396; VI-NEXT:    v_mov_b32_e32 v1, s3
397; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
398; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
399; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
400; VI-NEXT:    v_mov_b32_e32 v4, s0
401; VI-NEXT:    v_mov_b32_e32 v5, s1
402; VI-NEXT:    s_waitcnt vmcnt(0)
403; VI-NEXT:    v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
404; VI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
405; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
406; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
407; VI-NEXT:    s_endpgm
408  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
409  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
410  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
411
412  %a = load double, double addrspace(1)* %gep.0, align 8
413  %b = load double, double addrspace(1)* %gep.1, align 8
414
415  %cmp = fcmp olt double %a, %b
416  %val = select i1 %cmp, double %a, double %b
417  store double %val, double addrspace(1)* %out, align 8
418  ret void
419}
420
421declare i32 @llvm.amdgcn.workitem.id.x() #1
422
423attributes #0 = { nounwind }
424attributes #1 = { nounwind readnone }
425