1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s 5 6; Make sure the memory operand information is preserved. 7; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8-MIR %s 8; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9-MIR %s 9 10 11define amdgpu_ps float @ds_fmax_f32_ss(float addrspace(3)* inreg %ptr, float inreg %val) { 12; GFX8-LABEL: ds_fmax_f32_ss: 13; GFX8: ; %bb.0: 14; GFX8-NEXT: v_mov_b32_e32 v0, s2 15; GFX8-NEXT: v_mov_b32_e32 v1, s3 16; GFX8-NEXT: s_mov_b32 m0, -1 17; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1 18; GFX8-NEXT: s_waitcnt lgkmcnt(0) 19; GFX8-NEXT: ; return to shader part epilog 20; 21; GFX9-LABEL: ds_fmax_f32_ss: 22; GFX9: ; %bb.0: 23; GFX9-NEXT: v_mov_b32_e32 v0, s2 24; GFX9-NEXT: v_mov_b32_e32 v1, s3 25; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 26; GFX9-NEXT: s_waitcnt lgkmcnt(0) 27; GFX9-NEXT: ; return to shader part epilog 28 ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss 29 ; GFX8-MIR: bb.1 (%ir-block.0): 30 ; GFX8-MIR: liveins: $sgpr2, $sgpr3 31 ; GFX8-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 32 ; GFX8-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 33 ; GFX8-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] 34 ; GFX8-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] 35 ; GFX8-MIR: $m0 = S_MOV_B32 -1 36 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) 37 ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] 38 ; GFX8-MIR: SI_RETURN_TO_EPILOG implicit $vgpr0 39 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss 40 ; GFX9-MIR: bb.1 (%ir-block.0): 41 ; GFX9-MIR: liveins: $sgpr2, $sgpr3 42 ; GFX9-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 43 ; GFX9-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 44 ; GFX9-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] 45 ; GFX9-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] 46 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) 47 ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] 48 ; GFX9-MIR: SI_RETURN_TO_EPILOG implicit $vgpr0 49 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) 50 ret float %ret 51} 52 53define amdgpu_ps float @ds_fmax_f32_ss_offset(float addrspace(3)* inreg %ptr, float inreg %val) { 54; GFX8-LABEL: ds_fmax_f32_ss_offset: 55; GFX8: ; %bb.0: 56; GFX8-NEXT: v_mov_b32_e32 v0, s3 57; GFX8-NEXT: v_mov_b32_e32 v1, s2 58; GFX8-NEXT: s_mov_b32 m0, -1 59; GFX8-NEXT: ds_max_rtn_f32 v0, v1, v0 offset:512 60; GFX8-NEXT: s_waitcnt lgkmcnt(0) 61; GFX8-NEXT: ; return to shader part epilog 62; 63; GFX9-LABEL: ds_fmax_f32_ss_offset: 64; GFX9: ; %bb.0: 65; GFX9-NEXT: v_mov_b32_e32 v0, s3 66; GFX9-NEXT: v_mov_b32_e32 v1, s2 67; GFX9-NEXT: ds_max_rtn_f32 v0, v1, v0 offset:512 68; GFX9-NEXT: s_waitcnt lgkmcnt(0) 69; GFX9-NEXT: ; return to shader part epilog 70 ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_offset 71 ; GFX8-MIR: bb.1 (%ir-block.0): 72 ; GFX8-MIR: liveins: $sgpr2, $sgpr3 73 ; GFX8-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 74 ; GFX8-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 75 ; GFX8-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] 76 ; GFX8-MIR: $m0 = S_MOV_B32 -1 77 ; GFX8-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] 78 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) 79 ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] 80 ; GFX8-MIR: SI_RETURN_TO_EPILOG implicit $vgpr0 81 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_offset 82 ; GFX9-MIR: bb.1 (%ir-block.0): 83 ; GFX9-MIR: liveins: $sgpr2, $sgpr3 84 ; GFX9-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 85 ; GFX9-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 86 ; GFX9-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] 87 ; GFX9-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] 88 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) 89 ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] 90 ; GFX9-MIR: SI_RETURN_TO_EPILOG implicit $vgpr0 91 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 92 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) 93 ret float %ret 94} 95 96define amdgpu_ps void @ds_fmax_f32_ss_nortn(float addrspace(3)* inreg %ptr, float inreg %val) { 97; GFX8-LABEL: ds_fmax_f32_ss_nortn: 98; GFX8: ; %bb.0: 99; GFX8-NEXT: v_mov_b32_e32 v0, s2 100; GFX8-NEXT: v_mov_b32_e32 v1, s3 101; GFX8-NEXT: s_mov_b32 m0, -1 102; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1 103; GFX8-NEXT: s_endpgm 104; 105; GFX9-LABEL: ds_fmax_f32_ss_nortn: 106; GFX9: ; %bb.0: 107; GFX9-NEXT: v_mov_b32_e32 v0, s2 108; GFX9-NEXT: v_mov_b32_e32 v1, s3 109; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 110; GFX9-NEXT: s_endpgm 111 ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_nortn 112 ; GFX8-MIR: bb.1 (%ir-block.0): 113 ; GFX8-MIR: liveins: $sgpr2, $sgpr3 114 ; GFX8-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 115 ; GFX8-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 116 ; GFX8-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] 117 ; GFX8-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] 118 ; GFX8-MIR: $m0 = S_MOV_B32 -1 119 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) 120 ; GFX8-MIR: S_ENDPGM 0 121 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_nortn 122 ; GFX9-MIR: bb.1 (%ir-block.0): 123 ; GFX9-MIR: liveins: $sgpr2, $sgpr3 124 ; GFX9-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 125 ; GFX9-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 126 ; GFX9-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] 127 ; GFX9-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] 128 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) 129 ; GFX9-MIR: S_ENDPGM 0 130 %unused = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) 131 ret void 132} 133 134define amdgpu_ps void @ds_fmax_f32_ss_offset_nortn(float addrspace(3)* inreg %ptr, float inreg %val) { 135; GFX8-LABEL: ds_fmax_f32_ss_offset_nortn: 136; GFX8: ; %bb.0: 137; GFX8-NEXT: v_mov_b32_e32 v0, s3 138; GFX8-NEXT: v_mov_b32_e32 v1, s2 139; GFX8-NEXT: s_mov_b32 m0, -1 140; GFX8-NEXT: ds_max_rtn_f32 v0, v1, v0 offset:512 141; GFX8-NEXT: s_endpgm 142; 143; GFX9-LABEL: ds_fmax_f32_ss_offset_nortn: 144; GFX9: ; %bb.0: 145; GFX9-NEXT: v_mov_b32_e32 v0, s3 146; GFX9-NEXT: v_mov_b32_e32 v1, s2 147; GFX9-NEXT: ds_max_rtn_f32 v0, v1, v0 offset:512 148; GFX9-NEXT: s_endpgm 149 ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_offset_nortn 150 ; GFX8-MIR: bb.1 (%ir-block.0): 151 ; GFX8-MIR: liveins: $sgpr2, $sgpr3 152 ; GFX8-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 153 ; GFX8-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 154 ; GFX8-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] 155 ; GFX8-MIR: $m0 = S_MOV_B32 -1 156 ; GFX8-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] 157 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) 158 ; GFX8-MIR: S_ENDPGM 0 159 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_offset_nortn 160 ; GFX9-MIR: bb.1 (%ir-block.0): 161 ; GFX9-MIR: liveins: $sgpr2, $sgpr3 162 ; GFX9-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 163 ; GFX9-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 164 ; GFX9-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] 165 ; GFX9-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] 166 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) 167 ; GFX9-MIR: S_ENDPGM 0 168 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 169 %unused = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) 170 ret void 171} 172 173define float @ds_fmax_f32_vv(float addrspace(3)* %ptr, float %val) { 174; GFX8-LABEL: ds_fmax_f32_vv: 175; GFX8: ; %bb.0: 176; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 177; GFX8-NEXT: s_mov_b32 m0, -1 178; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1 179; GFX8-NEXT: s_waitcnt lgkmcnt(0) 180; GFX8-NEXT: s_setpc_b64 s[30:31] 181; 182; GFX9-LABEL: ds_fmax_f32_vv: 183; GFX9: ; %bb.0: 184; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 185; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 186; GFX9-NEXT: s_waitcnt lgkmcnt(0) 187; GFX9-NEXT: s_setpc_b64 s[30:31] 188 ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv 189 ; GFX8-MIR: bb.1 (%ir-block.0): 190 ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 191 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 192 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 193 ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 194 ; GFX8-MIR: $m0 = S_MOV_B32 -1 195 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) 196 ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] 197 ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] 198 ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 199 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv 200 ; GFX9-MIR: bb.1 (%ir-block.0): 201 ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 202 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 203 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 204 ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 205 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) 206 ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] 207 ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] 208 ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 209 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) 210 ret float %ret 211} 212 213define float @ds_fmax_f32_vv_offset(float addrspace(3)* %ptr, float %val) { 214; GFX8-LABEL: ds_fmax_f32_vv_offset: 215; GFX8: ; %bb.0: 216; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 217; GFX8-NEXT: s_mov_b32 m0, -1 218; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1 offset:512 219; GFX8-NEXT: s_waitcnt lgkmcnt(0) 220; GFX8-NEXT: s_setpc_b64 s[30:31] 221; 222; GFX9-LABEL: ds_fmax_f32_vv_offset: 223; GFX9: ; %bb.0: 224; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 225; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 offset:512 226; GFX9-NEXT: s_waitcnt lgkmcnt(0) 227; GFX9-NEXT: s_setpc_b64 s[30:31] 228 ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset 229 ; GFX8-MIR: bb.1 (%ir-block.0): 230 ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 231 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 232 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 233 ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 234 ; GFX8-MIR: $m0 = S_MOV_B32 -1 235 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) 236 ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] 237 ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] 238 ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 239 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset 240 ; GFX9-MIR: bb.1 (%ir-block.0): 241 ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 242 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 243 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 244 ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 245 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) 246 ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] 247 ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] 248 ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 249 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 250 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) 251 ret float %ret 252} 253 254define void @ds_fmax_f32_vv_nortn(float addrspace(3)* %ptr, float %val) { 255; GFX8-LABEL: ds_fmax_f32_vv_nortn: 256; GFX8: ; %bb.0: 257; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 258; GFX8-NEXT: s_mov_b32 m0, -1 259; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1 260; GFX8-NEXT: s_waitcnt lgkmcnt(0) 261; GFX8-NEXT: s_setpc_b64 s[30:31] 262; 263; GFX9-LABEL: ds_fmax_f32_vv_nortn: 264; GFX9: ; %bb.0: 265; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 266; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 267; GFX9-NEXT: s_waitcnt lgkmcnt(0) 268; GFX9-NEXT: s_setpc_b64 s[30:31] 269 ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_nortn 270 ; GFX8-MIR: bb.1 (%ir-block.0): 271 ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 272 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 273 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 274 ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 275 ; GFX8-MIR: $m0 = S_MOV_B32 -1 276 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) 277 ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] 278 ; GFX8-MIR: S_SETPC_B64_return [[COPY3]] 279 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_nortn 280 ; GFX9-MIR: bb.1 (%ir-block.0): 281 ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 282 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 283 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 284 ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 285 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) 286 ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] 287 ; GFX9-MIR: S_SETPC_B64_return [[COPY3]] 288 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) 289 ret void 290} 291 292define void @ds_fmax_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) { 293; GFX8-LABEL: ds_fmax_f32_vv_offset_nortn: 294; GFX8: ; %bb.0: 295; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 296; GFX8-NEXT: s_mov_b32 m0, -1 297; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1 offset:512 298; GFX8-NEXT: s_waitcnt lgkmcnt(0) 299; GFX8-NEXT: s_setpc_b64 s[30:31] 300; 301; GFX9-LABEL: ds_fmax_f32_vv_offset_nortn: 302; GFX9: ; %bb.0: 303; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 304; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 offset:512 305; GFX9-NEXT: s_waitcnt lgkmcnt(0) 306; GFX9-NEXT: s_setpc_b64 s[30:31] 307 ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn 308 ; GFX8-MIR: bb.1 (%ir-block.0): 309 ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 310 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 311 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 312 ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 313 ; GFX8-MIR: $m0 = S_MOV_B32 -1 314 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) 315 ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] 316 ; GFX8-MIR: S_SETPC_B64_return [[COPY3]] 317 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn 318 ; GFX9-MIR: bb.1 (%ir-block.0): 319 ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 320 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 321 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 322 ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 323 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) 324 ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] 325 ; GFX9-MIR: S_SETPC_B64_return [[COPY3]] 326 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 327 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) 328 ret void 329} 330 331define float @ds_fmax_f32_vv_volatile(float addrspace(3)* %ptr, float %val) { 332; GFX8-LABEL: ds_fmax_f32_vv_volatile: 333; GFX8: ; %bb.0: 334; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 335; GFX8-NEXT: s_mov_b32 m0, -1 336; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1 337; GFX8-NEXT: s_waitcnt lgkmcnt(0) 338; GFX8-NEXT: s_setpc_b64 s[30:31] 339; 340; GFX9-LABEL: ds_fmax_f32_vv_volatile: 341; GFX9: ; %bb.0: 342; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 343; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 344; GFX9-NEXT: s_waitcnt lgkmcnt(0) 345; GFX9-NEXT: s_setpc_b64 s[30:31] 346 ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_volatile 347 ; GFX8-MIR: bb.1 (%ir-block.0): 348 ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 349 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 350 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 351 ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 352 ; GFX8-MIR: $m0 = S_MOV_B32 -1 353 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) 354 ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] 355 ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] 356 ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 357 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_volatile 358 ; GFX9-MIR: bb.1 (%ir-block.0): 359 ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 360 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 361 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 362 ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 363 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) 364 ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] 365 ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] 366 ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 367 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 true) 368 ret float %ret 369} 370 371declare float @llvm.amdgcn.ds.fmax(float addrspace(3)* nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0 372 373attributes #0 = { argmemonly nounwind willreturn } 374