1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
5
6; Make sure the memory operand information is preserved.
7; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8-MIR %s
8; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9-MIR %s
9
10
11define amdgpu_ps float @ds_fmax_f32_ss(float addrspace(3)* inreg %ptr, float inreg %val) {
12; GFX8-LABEL: ds_fmax_f32_ss:
13; GFX8:       ; %bb.0:
14; GFX8-NEXT:    v_mov_b32_e32 v0, s2
15; GFX8-NEXT:    v_mov_b32_e32 v1, s3
16; GFX8-NEXT:    s_mov_b32 m0, -1
17; GFX8-NEXT:    ds_max_rtn_f32 v0, v0, v1
18; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
19; GFX8-NEXT:    ; return to shader part epilog
20;
21; GFX9-LABEL: ds_fmax_f32_ss:
22; GFX9:       ; %bb.0:
23; GFX9-NEXT:    v_mov_b32_e32 v0, s2
24; GFX9-NEXT:    v_mov_b32_e32 v1, s3
25; GFX9-NEXT:    ds_max_rtn_f32 v0, v0, v1
26; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
27; GFX9-NEXT:    ; return to shader part epilog
28  ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss
29  ; GFX8-MIR: bb.1 (%ir-block.0):
30  ; GFX8-MIR:   liveins: $sgpr2, $sgpr3
31  ; GFX8-MIR:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
32  ; GFX8-MIR:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
33  ; GFX8-MIR:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
34  ; GFX8-MIR:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
35  ; GFX8-MIR:   $m0 = S_MOV_B32 -1
36  ; GFX8-MIR:   [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
37  ; GFX8-MIR:   $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
38  ; GFX8-MIR:   SI_RETURN_TO_EPILOG implicit $vgpr0
39  ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss
40  ; GFX9-MIR: bb.1 (%ir-block.0):
41  ; GFX9-MIR:   liveins: $sgpr2, $sgpr3
42  ; GFX9-MIR:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
43  ; GFX9-MIR:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
44  ; GFX9-MIR:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
45  ; GFX9-MIR:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
46  ; GFX9-MIR:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
47  ; GFX9-MIR:   $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
48  ; GFX9-MIR:   SI_RETURN_TO_EPILOG implicit $vgpr0
49  %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
50  ret float %ret
51}
52
53define amdgpu_ps float @ds_fmax_f32_ss_offset(float addrspace(3)* inreg %ptr, float inreg %val) {
54; GFX8-LABEL: ds_fmax_f32_ss_offset:
55; GFX8:       ; %bb.0:
56; GFX8-NEXT:    v_mov_b32_e32 v0, s3
57; GFX8-NEXT:    v_mov_b32_e32 v1, s2
58; GFX8-NEXT:    s_mov_b32 m0, -1
59; GFX8-NEXT:    ds_max_rtn_f32 v0, v1, v0 offset:512
60; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
61; GFX8-NEXT:    ; return to shader part epilog
62;
63; GFX9-LABEL: ds_fmax_f32_ss_offset:
64; GFX9:       ; %bb.0:
65; GFX9-NEXT:    v_mov_b32_e32 v0, s3
66; GFX9-NEXT:    v_mov_b32_e32 v1, s2
67; GFX9-NEXT:    ds_max_rtn_f32 v0, v1, v0 offset:512
68; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
69; GFX9-NEXT:    ; return to shader part epilog
70  ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_offset
71  ; GFX8-MIR: bb.1 (%ir-block.0):
72  ; GFX8-MIR:   liveins: $sgpr2, $sgpr3
73  ; GFX8-MIR:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
74  ; GFX8-MIR:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
75  ; GFX8-MIR:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
76  ; GFX8-MIR:   $m0 = S_MOV_B32 -1
77  ; GFX8-MIR:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
78  ; GFX8-MIR:   [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
79  ; GFX8-MIR:   $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
80  ; GFX8-MIR:   SI_RETURN_TO_EPILOG implicit $vgpr0
81  ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_offset
82  ; GFX9-MIR: bb.1 (%ir-block.0):
83  ; GFX9-MIR:   liveins: $sgpr2, $sgpr3
84  ; GFX9-MIR:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
85  ; GFX9-MIR:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
86  ; GFX9-MIR:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
87  ; GFX9-MIR:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
88  ; GFX9-MIR:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
89  ; GFX9-MIR:   $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
90  ; GFX9-MIR:   SI_RETURN_TO_EPILOG implicit $vgpr0
91  %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
92  %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
93  ret float %ret
94}
95
96define amdgpu_ps void @ds_fmax_f32_ss_nortn(float addrspace(3)* inreg %ptr, float inreg %val) {
97; GFX8-LABEL: ds_fmax_f32_ss_nortn:
98; GFX8:       ; %bb.0:
99; GFX8-NEXT:    v_mov_b32_e32 v0, s2
100; GFX8-NEXT:    v_mov_b32_e32 v1, s3
101; GFX8-NEXT:    s_mov_b32 m0, -1
102; GFX8-NEXT:    ds_max_rtn_f32 v0, v0, v1
103; GFX8-NEXT:    s_endpgm
104;
105; GFX9-LABEL: ds_fmax_f32_ss_nortn:
106; GFX9:       ; %bb.0:
107; GFX9-NEXT:    v_mov_b32_e32 v0, s2
108; GFX9-NEXT:    v_mov_b32_e32 v1, s3
109; GFX9-NEXT:    ds_max_rtn_f32 v0, v0, v1
110; GFX9-NEXT:    s_endpgm
111  ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_nortn
112  ; GFX8-MIR: bb.1 (%ir-block.0):
113  ; GFX8-MIR:   liveins: $sgpr2, $sgpr3
114  ; GFX8-MIR:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
115  ; GFX8-MIR:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
116  ; GFX8-MIR:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
117  ; GFX8-MIR:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
118  ; GFX8-MIR:   $m0 = S_MOV_B32 -1
119  ; GFX8-MIR:   [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
120  ; GFX8-MIR:   S_ENDPGM 0
121  ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_nortn
122  ; GFX9-MIR: bb.1 (%ir-block.0):
123  ; GFX9-MIR:   liveins: $sgpr2, $sgpr3
124  ; GFX9-MIR:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
125  ; GFX9-MIR:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
126  ; GFX9-MIR:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
127  ; GFX9-MIR:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
128  ; GFX9-MIR:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
129  ; GFX9-MIR:   S_ENDPGM 0
130  %unused = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
131  ret void
132}
133
134define amdgpu_ps void @ds_fmax_f32_ss_offset_nortn(float addrspace(3)* inreg %ptr, float inreg %val) {
135; GFX8-LABEL: ds_fmax_f32_ss_offset_nortn:
136; GFX8:       ; %bb.0:
137; GFX8-NEXT:    v_mov_b32_e32 v0, s3
138; GFX8-NEXT:    v_mov_b32_e32 v1, s2
139; GFX8-NEXT:    s_mov_b32 m0, -1
140; GFX8-NEXT:    ds_max_rtn_f32 v0, v1, v0 offset:512
141; GFX8-NEXT:    s_endpgm
142;
143; GFX9-LABEL: ds_fmax_f32_ss_offset_nortn:
144; GFX9:       ; %bb.0:
145; GFX9-NEXT:    v_mov_b32_e32 v0, s3
146; GFX9-NEXT:    v_mov_b32_e32 v1, s2
147; GFX9-NEXT:    ds_max_rtn_f32 v0, v1, v0 offset:512
148; GFX9-NEXT:    s_endpgm
149  ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_offset_nortn
150  ; GFX8-MIR: bb.1 (%ir-block.0):
151  ; GFX8-MIR:   liveins: $sgpr2, $sgpr3
152  ; GFX8-MIR:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
153  ; GFX8-MIR:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
154  ; GFX8-MIR:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
155  ; GFX8-MIR:   $m0 = S_MOV_B32 -1
156  ; GFX8-MIR:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
157  ; GFX8-MIR:   [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
158  ; GFX8-MIR:   S_ENDPGM 0
159  ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_offset_nortn
160  ; GFX9-MIR: bb.1 (%ir-block.0):
161  ; GFX9-MIR:   liveins: $sgpr2, $sgpr3
162  ; GFX9-MIR:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
163  ; GFX9-MIR:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
164  ; GFX9-MIR:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
165  ; GFX9-MIR:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
166  ; GFX9-MIR:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
167  ; GFX9-MIR:   S_ENDPGM 0
168  %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
169  %unused = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
170  ret void
171}
172
173define float @ds_fmax_f32_vv(float addrspace(3)* %ptr, float %val) {
174; GFX8-LABEL: ds_fmax_f32_vv:
175; GFX8:       ; %bb.0:
176; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
177; GFX8-NEXT:    s_mov_b32 m0, -1
178; GFX8-NEXT:    ds_max_rtn_f32 v0, v0, v1
179; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
180; GFX8-NEXT:    s_setpc_b64 s[30:31]
181;
182; GFX9-LABEL: ds_fmax_f32_vv:
183; GFX9:       ; %bb.0:
184; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185; GFX9-NEXT:    ds_max_rtn_f32 v0, v0, v1
186; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
187; GFX9-NEXT:    s_setpc_b64 s[30:31]
188  ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv
189  ; GFX8-MIR: bb.1 (%ir-block.0):
190  ; GFX8-MIR:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
191  ; GFX8-MIR:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
192  ; GFX8-MIR:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
193  ; GFX8-MIR:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
194  ; GFX8-MIR:   $m0 = S_MOV_B32 -1
195  ; GFX8-MIR:   [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
196  ; GFX8-MIR:   $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
197  ; GFX8-MIR:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
198  ; GFX8-MIR:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
199  ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv
200  ; GFX9-MIR: bb.1 (%ir-block.0):
201  ; GFX9-MIR:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
202  ; GFX9-MIR:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
203  ; GFX9-MIR:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
204  ; GFX9-MIR:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
205  ; GFX9-MIR:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
206  ; GFX9-MIR:   $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
207  ; GFX9-MIR:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
208  ; GFX9-MIR:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
209  %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
210  ret float %ret
211}
212
213define float @ds_fmax_f32_vv_offset(float addrspace(3)* %ptr, float %val) {
214; GFX8-LABEL: ds_fmax_f32_vv_offset:
215; GFX8:       ; %bb.0:
216; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217; GFX8-NEXT:    s_mov_b32 m0, -1
218; GFX8-NEXT:    ds_max_rtn_f32 v0, v0, v1 offset:512
219; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
220; GFX8-NEXT:    s_setpc_b64 s[30:31]
221;
222; GFX9-LABEL: ds_fmax_f32_vv_offset:
223; GFX9:       ; %bb.0:
224; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
225; GFX9-NEXT:    ds_max_rtn_f32 v0, v0, v1 offset:512
226; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
227; GFX9-NEXT:    s_setpc_b64 s[30:31]
228  ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset
229  ; GFX8-MIR: bb.1 (%ir-block.0):
230  ; GFX8-MIR:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
231  ; GFX8-MIR:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
232  ; GFX8-MIR:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
233  ; GFX8-MIR:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
234  ; GFX8-MIR:   $m0 = S_MOV_B32 -1
235  ; GFX8-MIR:   [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
236  ; GFX8-MIR:   $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
237  ; GFX8-MIR:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
238  ; GFX8-MIR:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
239  ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset
240  ; GFX9-MIR: bb.1 (%ir-block.0):
241  ; GFX9-MIR:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
242  ; GFX9-MIR:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
243  ; GFX9-MIR:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
244  ; GFX9-MIR:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
245  ; GFX9-MIR:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
246  ; GFX9-MIR:   $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
247  ; GFX9-MIR:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
248  ; GFX9-MIR:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
249  %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
250  %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
251  ret float %ret
252}
253
254define void @ds_fmax_f32_vv_nortn(float addrspace(3)* %ptr, float %val) {
255; GFX8-LABEL: ds_fmax_f32_vv_nortn:
256; GFX8:       ; %bb.0:
257; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
258; GFX8-NEXT:    s_mov_b32 m0, -1
259; GFX8-NEXT:    ds_max_rtn_f32 v0, v0, v1
260; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
261; GFX8-NEXT:    s_setpc_b64 s[30:31]
262;
263; GFX9-LABEL: ds_fmax_f32_vv_nortn:
264; GFX9:       ; %bb.0:
265; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266; GFX9-NEXT:    ds_max_rtn_f32 v0, v0, v1
267; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
268; GFX9-NEXT:    s_setpc_b64 s[30:31]
269  ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_nortn
270  ; GFX8-MIR: bb.1 (%ir-block.0):
271  ; GFX8-MIR:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
272  ; GFX8-MIR:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
273  ; GFX8-MIR:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
274  ; GFX8-MIR:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
275  ; GFX8-MIR:   $m0 = S_MOV_B32 -1
276  ; GFX8-MIR:   [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
277  ; GFX8-MIR:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
278  ; GFX8-MIR:   S_SETPC_B64_return [[COPY3]]
279  ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_nortn
280  ; GFX9-MIR: bb.1 (%ir-block.0):
281  ; GFX9-MIR:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
282  ; GFX9-MIR:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
283  ; GFX9-MIR:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
284  ; GFX9-MIR:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
285  ; GFX9-MIR:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
286  ; GFX9-MIR:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
287  ; GFX9-MIR:   S_SETPC_B64_return [[COPY3]]
288  %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
289  ret void
290}
291
292define void @ds_fmax_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) {
293; GFX8-LABEL: ds_fmax_f32_vv_offset_nortn:
294; GFX8:       ; %bb.0:
295; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
296; GFX8-NEXT:    s_mov_b32 m0, -1
297; GFX8-NEXT:    ds_max_rtn_f32 v0, v0, v1 offset:512
298; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
299; GFX8-NEXT:    s_setpc_b64 s[30:31]
300;
301; GFX9-LABEL: ds_fmax_f32_vv_offset_nortn:
302; GFX9:       ; %bb.0:
303; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
304; GFX9-NEXT:    ds_max_rtn_f32 v0, v0, v1 offset:512
305; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
306; GFX9-NEXT:    s_setpc_b64 s[30:31]
307  ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn
308  ; GFX8-MIR: bb.1 (%ir-block.0):
309  ; GFX8-MIR:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
310  ; GFX8-MIR:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
311  ; GFX8-MIR:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
312  ; GFX8-MIR:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
313  ; GFX8-MIR:   $m0 = S_MOV_B32 -1
314  ; GFX8-MIR:   [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
315  ; GFX8-MIR:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
316  ; GFX8-MIR:   S_SETPC_B64_return [[COPY3]]
317  ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn
318  ; GFX9-MIR: bb.1 (%ir-block.0):
319  ; GFX9-MIR:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
320  ; GFX9-MIR:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
321  ; GFX9-MIR:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
322  ; GFX9-MIR:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
323  ; GFX9-MIR:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
324  ; GFX9-MIR:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
325  ; GFX9-MIR:   S_SETPC_B64_return [[COPY3]]
326  %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
327  %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
328  ret void
329}
330
331define float @ds_fmax_f32_vv_volatile(float addrspace(3)* %ptr, float %val) {
332; GFX8-LABEL: ds_fmax_f32_vv_volatile:
333; GFX8:       ; %bb.0:
334; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335; GFX8-NEXT:    s_mov_b32 m0, -1
336; GFX8-NEXT:    ds_max_rtn_f32 v0, v0, v1
337; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
338; GFX8-NEXT:    s_setpc_b64 s[30:31]
339;
340; GFX9-LABEL: ds_fmax_f32_vv_volatile:
341; GFX9:       ; %bb.0:
342; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
343; GFX9-NEXT:    ds_max_rtn_f32 v0, v0, v1
344; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
345; GFX9-NEXT:    s_setpc_b64 s[30:31]
346  ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_volatile
347  ; GFX8-MIR: bb.1 (%ir-block.0):
348  ; GFX8-MIR:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
349  ; GFX8-MIR:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
350  ; GFX8-MIR:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
351  ; GFX8-MIR:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
352  ; GFX8-MIR:   $m0 = S_MOV_B32 -1
353  ; GFX8-MIR:   [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3)
354  ; GFX8-MIR:   $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
355  ; GFX8-MIR:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
356  ; GFX8-MIR:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
357  ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_volatile
358  ; GFX9-MIR: bb.1 (%ir-block.0):
359  ; GFX9-MIR:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
360  ; GFX9-MIR:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
361  ; GFX9-MIR:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
362  ; GFX9-MIR:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
363  ; GFX9-MIR:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3)
364  ; GFX9-MIR:   $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
365  ; GFX9-MIR:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
366  ; GFX9-MIR:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
367  %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 true)
368  ret float %ret
369}
370
371declare float @llvm.amdgcn.ds.fmax(float addrspace(3)* nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0
372
373attributes #0 = { argmemonly nounwind willreturn }
374