1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s
5
6define amdgpu_ps float @ds_fmin_f32_ss(float addrspace(3)* inreg %ptr, float inreg %val) {
7; GFX8-LABEL: ds_fmin_f32_ss:
8; GFX8:       ; %bb.0:
9; GFX8-NEXT:    v_mov_b32_e32 v0, s2
10; GFX8-NEXT:    v_mov_b32_e32 v1, s3
11; GFX8-NEXT:    s_mov_b32 m0, -1
12; GFX8-NEXT:    ds_min_rtn_f32 v0, v0, v1
13; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
14; GFX8-NEXT:    ; return to shader part epilog
15;
16; GFX9-LABEL: ds_fmin_f32_ss:
17; GFX9:       ; %bb.0:
18; GFX9-NEXT:    v_mov_b32_e32 v0, s2
19; GFX9-NEXT:    v_mov_b32_e32 v1, s3
20; GFX9-NEXT:    ds_min_rtn_f32 v0, v0, v1
21; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
22; GFX9-NEXT:    ; return to shader part epilog
23;
24; GFX10-LABEL: ds_fmin_f32_ss:
25; GFX10:       ; %bb.0:
26; GFX10-NEXT:    v_mov_b32_e32 v0, s2
27; GFX10-NEXT:    v_mov_b32_e32 v1, s3
28; GFX10-NEXT:    ds_min_rtn_f32 v0, v0, v1
29; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
30; GFX10-NEXT:    ; return to shader part epilog
31  %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
32  ret float %ret
33}
34
35define amdgpu_ps float @ds_fmin_f32_ss_offset(float addrspace(3)* inreg %ptr, float inreg %val) {
36; GFX8-LABEL: ds_fmin_f32_ss_offset:
37; GFX8:       ; %bb.0:
38; GFX8-NEXT:    v_mov_b32_e32 v0, s3
39; GFX8-NEXT:    v_mov_b32_e32 v1, s2
40; GFX8-NEXT:    s_mov_b32 m0, -1
41; GFX8-NEXT:    ds_min_rtn_f32 v0, v1, v0 offset:512
42; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
43; GFX8-NEXT:    ; return to shader part epilog
44;
45; GFX9-LABEL: ds_fmin_f32_ss_offset:
46; GFX9:       ; %bb.0:
47; GFX9-NEXT:    v_mov_b32_e32 v0, s3
48; GFX9-NEXT:    v_mov_b32_e32 v1, s2
49; GFX9-NEXT:    ds_min_rtn_f32 v0, v1, v0 offset:512
50; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
51; GFX9-NEXT:    ; return to shader part epilog
52;
53; GFX10-LABEL: ds_fmin_f32_ss_offset:
54; GFX10:       ; %bb.0:
55; GFX10-NEXT:    v_mov_b32_e32 v0, s3
56; GFX10-NEXT:    v_mov_b32_e32 v1, s2
57; GFX10-NEXT:    ds_min_rtn_f32 v0, v1, v0 offset:512
58; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
59; GFX10-NEXT:    ; return to shader part epilog
60  %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
61  %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
62  ret float %ret
63}
64
65define amdgpu_ps void @ds_fmin_f32_ss_nortn(float addrspace(3)* inreg %ptr, float inreg %val) {
66; GFX8-LABEL: ds_fmin_f32_ss_nortn:
67; GFX8:       ; %bb.0:
68; GFX8-NEXT:    v_mov_b32_e32 v0, s2
69; GFX8-NEXT:    v_mov_b32_e32 v1, s3
70; GFX8-NEXT:    s_mov_b32 m0, -1
71; GFX8-NEXT:    ds_min_rtn_f32 v0, v0, v1
72; GFX8-NEXT:    s_endpgm
73;
74; GFX9-LABEL: ds_fmin_f32_ss_nortn:
75; GFX9:       ; %bb.0:
76; GFX9-NEXT:    v_mov_b32_e32 v0, s2
77; GFX9-NEXT:    v_mov_b32_e32 v1, s3
78; GFX9-NEXT:    ds_min_rtn_f32 v0, v0, v1
79; GFX9-NEXT:    s_endpgm
80;
81; GFX10-LABEL: ds_fmin_f32_ss_nortn:
82; GFX10:       ; %bb.0:
83; GFX10-NEXT:    v_mov_b32_e32 v0, s2
84; GFX10-NEXT:    v_mov_b32_e32 v1, s3
85; GFX10-NEXT:    ds_min_rtn_f32 v0, v0, v1
86; GFX10-NEXT:    s_endpgm
87  %unused = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
88  ret void
89}
90
91define amdgpu_ps void @ds_fmin_f32_ss_offset_nortn(float addrspace(3)* inreg %ptr, float inreg %val) {
92; GFX8-LABEL: ds_fmin_f32_ss_offset_nortn:
93; GFX8:       ; %bb.0:
94; GFX8-NEXT:    v_mov_b32_e32 v0, s3
95; GFX8-NEXT:    v_mov_b32_e32 v1, s2
96; GFX8-NEXT:    s_mov_b32 m0, -1
97; GFX8-NEXT:    ds_min_rtn_f32 v0, v1, v0 offset:512
98; GFX8-NEXT:    s_endpgm
99;
100; GFX9-LABEL: ds_fmin_f32_ss_offset_nortn:
101; GFX9:       ; %bb.0:
102; GFX9-NEXT:    v_mov_b32_e32 v0, s3
103; GFX9-NEXT:    v_mov_b32_e32 v1, s2
104; GFX9-NEXT:    ds_min_rtn_f32 v0, v1, v0 offset:512
105; GFX9-NEXT:    s_endpgm
106;
107; GFX10-LABEL: ds_fmin_f32_ss_offset_nortn:
108; GFX10:       ; %bb.0:
109; GFX10-NEXT:    v_mov_b32_e32 v0, s3
110; GFX10-NEXT:    v_mov_b32_e32 v1, s2
111; GFX10-NEXT:    ds_min_rtn_f32 v0, v1, v0 offset:512
112; GFX10-NEXT:    s_endpgm
113  %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
114  %unused = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
115  ret void
116}
117
118define float @ds_fmin_f32_vv(float addrspace(3)* %ptr, float %val) {
119; GFX8-LABEL: ds_fmin_f32_vv:
120; GFX8:       ; %bb.0:
121; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122; GFX8-NEXT:    s_mov_b32 m0, -1
123; GFX8-NEXT:    ds_min_rtn_f32 v0, v0, v1
124; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
125; GFX8-NEXT:    s_setpc_b64 s[30:31]
126;
127; GFX9-LABEL: ds_fmin_f32_vv:
128; GFX9:       ; %bb.0:
129; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130; GFX9-NEXT:    ds_min_rtn_f32 v0, v0, v1
131; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
132; GFX9-NEXT:    s_setpc_b64 s[30:31]
133;
134; GFX10-LABEL: ds_fmin_f32_vv:
135; GFX10:       ; %bb.0:
136; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
137; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
138; GFX10-NEXT:    ds_min_rtn_f32 v0, v0, v1
139; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
140; GFX10-NEXT:    s_setpc_b64 s[30:31]
141  %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
142  ret float %ret
143}
144
145define float @ds_fmin_f32_vv_offset(float addrspace(3)* %ptr, float %val) {
146; GFX8-LABEL: ds_fmin_f32_vv_offset:
147; GFX8:       ; %bb.0:
148; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
149; GFX8-NEXT:    s_mov_b32 m0, -1
150; GFX8-NEXT:    ds_min_rtn_f32 v0, v0, v1 offset:512
151; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
152; GFX8-NEXT:    s_setpc_b64 s[30:31]
153;
154; GFX9-LABEL: ds_fmin_f32_vv_offset:
155; GFX9:       ; %bb.0:
156; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157; GFX9-NEXT:    ds_min_rtn_f32 v0, v0, v1 offset:512
158; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
159; GFX9-NEXT:    s_setpc_b64 s[30:31]
160;
161; GFX10-LABEL: ds_fmin_f32_vv_offset:
162; GFX10:       ; %bb.0:
163; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
165; GFX10-NEXT:    ds_min_rtn_f32 v0, v0, v1 offset:512
166; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
167; GFX10-NEXT:    s_setpc_b64 s[30:31]
168  %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
169  %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
170  ret float %ret
171}
172
173define void @ds_fmin_f32_vv_nortn(float addrspace(3)* %ptr, float %val) {
174; GFX8-LABEL: ds_fmin_f32_vv_nortn:
175; GFX8:       ; %bb.0:
176; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
177; GFX8-NEXT:    s_mov_b32 m0, -1
178; GFX8-NEXT:    ds_min_rtn_f32 v0, v0, v1
179; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
180; GFX8-NEXT:    s_setpc_b64 s[30:31]
181;
182; GFX9-LABEL: ds_fmin_f32_vv_nortn:
183; GFX9:       ; %bb.0:
184; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185; GFX9-NEXT:    ds_min_rtn_f32 v0, v0, v1
186; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
187; GFX9-NEXT:    s_setpc_b64 s[30:31]
188;
189; GFX10-LABEL: ds_fmin_f32_vv_nortn:
190; GFX10:       ; %bb.0:
191; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
192; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
193; GFX10-NEXT:    ds_min_rtn_f32 v0, v0, v1
194; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
195; GFX10-NEXT:    s_setpc_b64 s[30:31]
196  %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
197  ret void
198}
199
200define void @ds_fmin_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) {
201; GFX8-LABEL: ds_fmin_f32_vv_offset_nortn:
202; GFX8:       ; %bb.0:
203; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
204; GFX8-NEXT:    s_mov_b32 m0, -1
205; GFX8-NEXT:    ds_min_rtn_f32 v0, v0, v1 offset:512
206; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
207; GFX8-NEXT:    s_setpc_b64 s[30:31]
208;
209; GFX9-LABEL: ds_fmin_f32_vv_offset_nortn:
210; GFX9:       ; %bb.0:
211; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212; GFX9-NEXT:    ds_min_rtn_f32 v0, v0, v1 offset:512
213; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
214; GFX9-NEXT:    s_setpc_b64 s[30:31]
215;
216; GFX10-LABEL: ds_fmin_f32_vv_offset_nortn:
217; GFX10:       ; %bb.0:
218; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
219; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
220; GFX10-NEXT:    ds_min_rtn_f32 v0, v0, v1 offset:512
221; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
222; GFX10-NEXT:    s_setpc_b64 s[30:31]
223  %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
224  %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
225  ret void
226}
227
228define float @ds_fmin_f32_vv_volatile(float addrspace(3)* %ptr, float %val) {
229; GFX8-LABEL: ds_fmin_f32_vv_volatile:
230; GFX8:       ; %bb.0:
231; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232; GFX8-NEXT:    s_mov_b32 m0, -1
233; GFX8-NEXT:    ds_min_rtn_f32 v0, v0, v1
234; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
235; GFX8-NEXT:    s_setpc_b64 s[30:31]
236;
237; GFX9-LABEL: ds_fmin_f32_vv_volatile:
238; GFX9:       ; %bb.0:
239; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
240; GFX9-NEXT:    ds_min_rtn_f32 v0, v0, v1
241; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
242; GFX9-NEXT:    s_setpc_b64 s[30:31]
243;
244; GFX10-LABEL: ds_fmin_f32_vv_volatile:
245; GFX10:       ; %bb.0:
246; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
247; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
248; GFX10-NEXT:    ds_min_rtn_f32 v0, v0, v1
249; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
250; GFX10-NEXT:    s_setpc_b64 s[30:31]
251  %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 true)
252  ret float %ret
253}
254
255declare float @llvm.amdgcn.ds.fmin(float addrspace(3)* nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0
256
257attributes #0 = { argmemonly nounwind willreturn }
258