1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GFX908 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GFX90A 4 5; Natural mapping 6define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 7 ; GFX908-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset 8 ; GFX908: bb.1 (%ir-block.0): 9 ; GFX908: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 10 ; GFX908: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 11 ; GFX908: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 12 ; GFX908: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 13 ; GFX908: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 14 ; GFX908: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 15 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 16 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 17 ; GFX908: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 18 ; GFX908: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 19 ; GFX908: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 20 ; GFX908: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 21 ; GFX908: S_ENDPGM 0 22 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset 23 ; GFX90A: bb.1 (%ir-block.0): 24 ; GFX90A: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 25 ; GFX90A: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 26 ; GFX90A: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 27 ; GFX90A: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 28 ; GFX90A: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 29 ; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 30 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 31 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 32 ; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 33 ; GFX90A: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 34 ; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 35 ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 36 ; GFX90A: S_ENDPGM 0 37 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 38 ret void 39} 40 41define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_plus4095__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 42 ; GFX908-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_plus4095__sgpr_soffset 43 ; GFX908: bb.1 (%ir-block.0): 44 ; GFX908: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 45 ; GFX908: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 46 ; GFX908: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 47 ; GFX908: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 48 ; GFX908: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 49 ; GFX908: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 50 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 51 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 52 ; GFX908: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 53 ; GFX908: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 54 ; GFX908: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 55 ; GFX908: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 56 ; GFX908: S_ENDPGM 0 57 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_plus4095__sgpr_soffset 58 ; GFX90A: bb.1 (%ir-block.0): 59 ; GFX90A: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 60 ; GFX90A: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 61 ; GFX90A: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 62 ; GFX90A: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 63 ; GFX90A: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 64 ; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 65 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 66 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 67 ; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 68 ; GFX90A: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 69 ; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 70 ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 71 ; GFX90A: S_ENDPGM 0 72 %voffset.add = add i32 %voffset, 4095 73 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0) 74 ret void 75} 76 77define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__4095_voffset__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) { 78 ; GFX908-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__4095_voffset__sgpr_soffset 79 ; GFX908: bb.1 (%ir-block.0): 80 ; GFX908: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 81 ; GFX908: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 82 ; GFX908: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 83 ; GFX908: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 84 ; GFX908: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 85 ; GFX908: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 86 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 87 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 88 ; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 89 ; GFX908: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 90 ; GFX908: S_ENDPGM 0 91 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__4095_voffset__sgpr_soffset 92 ; GFX90A: bb.1 (%ir-block.0): 93 ; GFX90A: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 94 ; GFX90A: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 95 ; GFX90A: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 96 ; GFX90A: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 97 ; GFX90A: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 98 ; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 99 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 100 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 101 ; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 102 ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 103 ; GFX90A: S_ENDPGM 0 104 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 4095, i32 %soffset, i32 0) 105 ret void 106} 107 108; Natural mapping, no voffset 109define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) { 110 ; GFX908-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset 111 ; GFX908: bb.1 (%ir-block.0): 112 ; GFX908: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 113 ; GFX908: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 114 ; GFX908: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 115 ; GFX908: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 116 ; GFX908: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 117 ; GFX908: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 118 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 119 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 120 ; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 121 ; GFX908: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 122 ; GFX908: S_ENDPGM 0 123 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset 124 ; GFX90A: bb.1 (%ir-block.0): 125 ; GFX90A: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 126 ; GFX90A: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 127 ; GFX90A: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 128 ; GFX90A: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 129 ; GFX90A: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 130 ; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 131 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 132 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 133 ; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 134 ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 135 ; GFX90A: S_ENDPGM 0 136 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) 137 ret void 138} 139 140; All register operands need legalization 141define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgpr_voffset__vgpr_soffset(float inreg %val, <4 x i32> %rsrc, i32 inreg %vindex, i32 inreg %voffset, i32 %soffset) { 142 ; GFX908-LABEL: name: struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgpr_voffset__vgpr_soffset 143 ; GFX908: bb.1 (%ir-block.0): 144 ; GFX908: successors: %bb.2(0x80000000) 145 ; GFX908: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 146 ; GFX908: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 147 ; GFX908: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 148 ; GFX908: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 149 ; GFX908: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 150 ; GFX908: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 151 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 152 ; GFX908: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 153 ; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 154 ; GFX908: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 155 ; GFX908: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] 156 ; GFX908: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 157 ; GFX908: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 158 ; GFX908: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 159 ; GFX908: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 160 ; GFX908: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 161 ; GFX908: bb.2: 162 ; GFX908: successors: %bb.3(0x40000000), %bb.2(0x40000000) 163 ; GFX908: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec 164 ; GFX908: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec 165 ; GFX908: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 166 ; GFX908: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec 167 ; GFX908: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec 168 ; GFX908: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec 169 ; GFX908: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 170 ; GFX908: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec 171 ; GFX908: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 172 ; GFX908: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 173 ; GFX908: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 174 ; GFX908: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec 175 ; GFX908: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc 176 ; GFX908: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 177 ; GFX908: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 178 ; GFX908: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 179 ; GFX908: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 180 ; GFX908: S_CBRANCH_EXECNZ %bb.2, implicit $exec 181 ; GFX908: bb.3: 182 ; GFX908: successors: %bb.4(0x80000000) 183 ; GFX908: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 184 ; GFX908: bb.4: 185 ; GFX908: S_ENDPGM 0 186 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgpr_voffset__vgpr_soffset 187 ; GFX90A: bb.1 (%ir-block.0): 188 ; GFX90A: successors: %bb.2(0x80000000) 189 ; GFX90A: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 190 ; GFX90A: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 191 ; GFX90A: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 192 ; GFX90A: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 193 ; GFX90A: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 194 ; GFX90A: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 195 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 196 ; GFX90A: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 197 ; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 198 ; GFX90A: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 199 ; GFX90A: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] 200 ; GFX90A: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 201 ; GFX90A: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 202 ; GFX90A: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 203 ; GFX90A: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 204 ; GFX90A: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 205 ; GFX90A: bb.2: 206 ; GFX90A: successors: %bb.3(0x40000000), %bb.2(0x40000000) 207 ; GFX90A: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec 208 ; GFX90A: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec 209 ; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 210 ; GFX90A: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY11]], implicit $exec 211 ; GFX90A: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec 212 ; GFX90A: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec 213 ; GFX90A: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 214 ; GFX90A: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY12]], implicit $exec 215 ; GFX90A: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 216 ; GFX90A: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 217 ; GFX90A: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 218 ; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec 219 ; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc 220 ; GFX90A: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 221 ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 222 ; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 223 ; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 224 ; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec 225 ; GFX90A: bb.3: 226 ; GFX90A: successors: %bb.4(0x80000000) 227 ; GFX90A: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 228 ; GFX90A: bb.4: 229 ; GFX90A: S_ENDPGM 0 230 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 231 ret void 232} 233 234; All register operands need legalization, no voffset 235define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_voffset__vgpr_soffset(float inreg %val, <4 x i32> %rsrc, i32 inreg %vindex, i32 %soffset) { 236 ; GFX908-LABEL: name: struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_voffset__vgpr_soffset 237 ; GFX908: bb.1 (%ir-block.0): 238 ; GFX908: successors: %bb.2(0x80000000) 239 ; GFX908: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 240 ; GFX908: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 241 ; GFX908: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 242 ; GFX908: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 243 ; GFX908: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 244 ; GFX908: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 245 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 246 ; GFX908: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 247 ; GFX908: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 248 ; GFX908: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] 249 ; GFX908: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 250 ; GFX908: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 251 ; GFX908: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 252 ; GFX908: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 253 ; GFX908: bb.2: 254 ; GFX908: successors: %bb.3(0x40000000), %bb.2(0x40000000) 255 ; GFX908: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec 256 ; GFX908: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec 257 ; GFX908: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 258 ; GFX908: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec 259 ; GFX908: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec 260 ; GFX908: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec 261 ; GFX908: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 262 ; GFX908: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec 263 ; GFX908: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 264 ; GFX908: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 265 ; GFX908: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 266 ; GFX908: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec 267 ; GFX908: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc 268 ; GFX908: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 269 ; GFX908: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 270 ; GFX908: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 271 ; GFX908: S_CBRANCH_EXECNZ %bb.2, implicit $exec 272 ; GFX908: bb.3: 273 ; GFX908: successors: %bb.4(0x80000000) 274 ; GFX908: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 275 ; GFX908: bb.4: 276 ; GFX908: S_ENDPGM 0 277 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_voffset__vgpr_soffset 278 ; GFX90A: bb.1 (%ir-block.0): 279 ; GFX90A: successors: %bb.2(0x80000000) 280 ; GFX90A: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 281 ; GFX90A: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 282 ; GFX90A: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 283 ; GFX90A: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 284 ; GFX90A: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 285 ; GFX90A: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 286 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 287 ; GFX90A: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 288 ; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 289 ; GFX90A: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] 290 ; GFX90A: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 291 ; GFX90A: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 292 ; GFX90A: [[COPY10:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 293 ; GFX90A: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 294 ; GFX90A: bb.2: 295 ; GFX90A: successors: %bb.3(0x40000000), %bb.2(0x40000000) 296 ; GFX90A: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec 297 ; GFX90A: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec 298 ; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 299 ; GFX90A: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec 300 ; GFX90A: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec 301 ; GFX90A: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec 302 ; GFX90A: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 303 ; GFX90A: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec 304 ; GFX90A: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 305 ; GFX90A: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 306 ; GFX90A: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 307 ; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec 308 ; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc 309 ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 310 ; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 311 ; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 312 ; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec 313 ; GFX90A: bb.3: 314 ; GFX90A: successors: %bb.4(0x80000000) 315 ; GFX90A: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 316 ; GFX90A: bb.4: 317 ; GFX90A: S_ENDPGM 0 318 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) 319 ret void 320} 321 322; Natural mapping + slc 323define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 324 ; GFX908-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc 325 ; GFX908: bb.1 (%ir-block.0): 326 ; GFX908: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 327 ; GFX908: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 328 ; GFX908: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 329 ; GFX908: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 330 ; GFX908: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 331 ; GFX908: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 332 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 333 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 334 ; GFX908: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 335 ; GFX908: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 336 ; GFX908: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 337 ; GFX908: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 338 ; GFX908: S_ENDPGM 0 339 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc 340 ; GFX90A: bb.1 (%ir-block.0): 341 ; GFX90A: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 342 ; GFX90A: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 343 ; GFX90A: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 344 ; GFX90A: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 345 ; GFX90A: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 346 ; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 347 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 348 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 349 ; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 350 ; GFX90A: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 351 ; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 352 ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 353 ; GFX90A: S_ENDPGM 0 354 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) 355 ret void 356} 357 358define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset_slc(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) { 359 ; GFX908-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset_slc 360 ; GFX908: bb.1 (%ir-block.0): 361 ; GFX908: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 362 ; GFX908: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 363 ; GFX908: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 364 ; GFX908: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 365 ; GFX908: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 366 ; GFX908: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 367 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 368 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 369 ; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 370 ; GFX908: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 371 ; GFX908: S_ENDPGM 0 372 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset_slc 373 ; GFX90A: bb.1 (%ir-block.0): 374 ; GFX90A: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 375 ; GFX90A: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 376 ; GFX90A: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 377 ; GFX90A: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 378 ; GFX90A: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 379 ; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 380 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 381 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 382 ; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 383 ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 384 ; GFX90A: S_ENDPGM 0 385 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 2) 386 ret void 387} 388 389define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 390 ; GFX908-LABEL: name: struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset 391 ; GFX908: bb.1 (%ir-block.0): 392 ; GFX908: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 393 ; GFX908: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 394 ; GFX908: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 395 ; GFX908: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 396 ; GFX908: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 397 ; GFX908: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 398 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 399 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 400 ; GFX908: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 401 ; GFX908: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 402 ; GFX908: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 403 ; GFX908: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) 404 ; GFX908: S_ENDPGM 0 405 ; GFX90A-LABEL: name: struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset 406 ; GFX90A: bb.1 (%ir-block.0): 407 ; GFX90A: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 408 ; GFX90A: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 409 ; GFX90A: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 410 ; GFX90A: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 411 ; GFX90A: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 412 ; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 413 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 414 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 415 ; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 416 ; GFX90A: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 417 ; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 418 ; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) 419 ; GFX90A: S_ENDPGM 0 420 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 421 ret void 422} 423 424define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) { 425 ; GFX908-LABEL: name: struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset 426 ; GFX908: bb.1 (%ir-block.0): 427 ; GFX908: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 428 ; GFX908: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 429 ; GFX908: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 430 ; GFX908: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 431 ; GFX908: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 432 ; GFX908: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 433 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 434 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 435 ; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 436 ; GFX908: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) 437 ; GFX908: S_ENDPGM 0 438 ; GFX90A-LABEL: name: struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset 439 ; GFX90A: bb.1 (%ir-block.0): 440 ; GFX90A: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 441 ; GFX90A: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 442 ; GFX90A: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 443 ; GFX90A: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 444 ; GFX90A: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 445 ; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 446 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 447 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 448 ; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 449 ; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) 450 ; GFX90A: S_ENDPGM 0 451 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) 452 ret void 453} 454 455declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) #0 456declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg) #0 457 458attributes #0 = { nounwind } 459