1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GFX908 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GFX90A 4 5; Natural mapping 6define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 7 ; GFX908-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset 8 ; GFX908: bb.1 (%ir-block.0): 9 ; GFX908: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 10 ; GFX908: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 11 ; GFX908: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 12 ; GFX908: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 13 ; GFX908: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 14 ; GFX908: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 15 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 16 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 17 ; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 18 ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 19 ; GFX908: S_ENDPGM 0 20 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset 21 ; GFX90A: bb.1 (%ir-block.0): 22 ; GFX90A: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 23 ; GFX90A: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 24 ; GFX90A: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 25 ; GFX90A: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 26 ; GFX90A: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 27 ; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 28 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 29 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 30 ; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 31 ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 32 ; GFX90A: S_ENDPGM 0 33 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 34 ret void 35} 36 37define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_plus4095__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 38 ; GFX908-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_plus4095__sgpr_soffset 39 ; GFX908: bb.1 (%ir-block.0): 40 ; GFX908: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 41 ; GFX908: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 42 ; GFX908: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 43 ; GFX908: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 44 ; GFX908: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 45 ; GFX908: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 46 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 47 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 48 ; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 49 ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 50 ; GFX908: S_ENDPGM 0 51 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_plus4095__sgpr_soffset 52 ; GFX90A: bb.1 (%ir-block.0): 53 ; GFX90A: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 54 ; GFX90A: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 55 ; GFX90A: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 56 ; GFX90A: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 57 ; GFX90A: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 58 ; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 59 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 60 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 61 ; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 62 ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 63 ; GFX90A: S_ENDPGM 0 64 %voffset.add = add i32 %voffset, 4095 65 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 66 ret void 67} 68 69define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_4095__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { 70 ; GFX908-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_4095__sgpr_soffset 71 ; GFX908: bb.1 (%ir-block.0): 72 ; GFX908: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 73 ; GFX908: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 74 ; GFX908: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 75 ; GFX908: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 76 ; GFX908: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 77 ; GFX908: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 78 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 79 ; GFX908: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 80 ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 81 ; GFX908: S_ENDPGM 0 82 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_4095__sgpr_soffset 83 ; GFX90A: bb.1 (%ir-block.0): 84 ; GFX90A: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 85 ; GFX90A: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 86 ; GFX90A: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 87 ; GFX90A: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 88 ; GFX90A: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 89 ; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 90 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 91 ; GFX90A: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 92 ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 93 ; GFX90A: S_ENDPGM 0 94 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) 95 ret void 96} 97 98; Natural mapping, no voffset 99define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { 100 ; GFX908-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset 101 ; GFX908: bb.1 (%ir-block.0): 102 ; GFX908: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 103 ; GFX908: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 104 ; GFX908: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 105 ; GFX908: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 106 ; GFX908: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 107 ; GFX908: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 108 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 109 ; GFX908: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 110 ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 111 ; GFX908: S_ENDPGM 0 112 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset 113 ; GFX90A: bb.1 (%ir-block.0): 114 ; GFX90A: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 115 ; GFX90A: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 116 ; GFX90A: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 117 ; GFX90A: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 118 ; GFX90A: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 119 ; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 120 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 121 ; GFX90A: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 122 ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 123 ; GFX90A: S_ENDPGM 0 124 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) 125 ret void 126} 127 128; All operands need regbank legalization 129define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgpr_voffset__vgpr_soffset(float inreg %val, <4 x i32> %rsrc, i32 inreg %voffset, i32 %soffset) { 130 ; GFX908-LABEL: name: raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgpr_voffset__vgpr_soffset 131 ; GFX908: bb.1 (%ir-block.0): 132 ; GFX908: successors: %bb.2(0x80000000) 133 ; GFX908: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 134 ; GFX908: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 135 ; GFX908: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 136 ; GFX908: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 137 ; GFX908: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 138 ; GFX908: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 139 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 140 ; GFX908: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 141 ; GFX908: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 142 ; GFX908: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] 143 ; GFX908: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 144 ; GFX908: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 145 ; GFX908: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 146 ; GFX908: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 147 ; GFX908: bb.2: 148 ; GFX908: successors: %bb.3(0x40000000), %bb.2(0x40000000) 149 ; GFX908: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec 150 ; GFX908: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec 151 ; GFX908: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 152 ; GFX908: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec 153 ; GFX908: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec 154 ; GFX908: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec 155 ; GFX908: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 156 ; GFX908: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec 157 ; GFX908: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 158 ; GFX908: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 159 ; GFX908: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 160 ; GFX908: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec 161 ; GFX908: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc 162 ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 163 ; GFX908: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 164 ; GFX908: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 165 ; GFX908: S_CBRANCH_EXECNZ %bb.2, implicit $exec 166 ; GFX908: bb.3: 167 ; GFX908: successors: %bb.4(0x80000000) 168 ; GFX908: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 169 ; GFX908: bb.4: 170 ; GFX908: S_ENDPGM 0 171 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgpr_voffset__vgpr_soffset 172 ; GFX90A: bb.1 (%ir-block.0): 173 ; GFX90A: successors: %bb.2(0x80000000) 174 ; GFX90A: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 175 ; GFX90A: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 176 ; GFX90A: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 177 ; GFX90A: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 178 ; GFX90A: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 179 ; GFX90A: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 180 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 181 ; GFX90A: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 182 ; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 183 ; GFX90A: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] 184 ; GFX90A: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 185 ; GFX90A: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 186 ; GFX90A: [[COPY10:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 187 ; GFX90A: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 188 ; GFX90A: bb.2: 189 ; GFX90A: successors: %bb.3(0x40000000), %bb.2(0x40000000) 190 ; GFX90A: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec 191 ; GFX90A: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec 192 ; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 193 ; GFX90A: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec 194 ; GFX90A: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec 195 ; GFX90A: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec 196 ; GFX90A: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 197 ; GFX90A: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec 198 ; GFX90A: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 199 ; GFX90A: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 200 ; GFX90A: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 201 ; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec 202 ; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc 203 ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 204 ; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 205 ; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 206 ; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec 207 ; GFX90A: bb.3: 208 ; GFX90A: successors: %bb.4(0x80000000) 209 ; GFX90A: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 210 ; GFX90A: bb.4: 211 ; GFX90A: S_ENDPGM 0 212 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 213 ret void 214} 215 216; All operands need regbank legalization, no voffset 217define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_voffset__vgpr_soffset(float inreg %val, <4 x i32> %rsrc, i32 %soffset) { 218 ; GFX908-LABEL: name: raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_voffset__vgpr_soffset 219 ; GFX908: bb.1 (%ir-block.0): 220 ; GFX908: successors: %bb.2(0x80000000) 221 ; GFX908: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 222 ; GFX908: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 223 ; GFX908: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 224 ; GFX908: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 225 ; GFX908: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 226 ; GFX908: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 227 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 228 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 229 ; GFX908: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY]] 230 ; GFX908: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 231 ; GFX908: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 232 ; GFX908: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 233 ; GFX908: bb.2: 234 ; GFX908: successors: %bb.3(0x40000000), %bb.2(0x40000000) 235 ; GFX908: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 236 ; GFX908: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 237 ; GFX908: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 238 ; GFX908: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec 239 ; GFX908: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec 240 ; GFX908: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec 241 ; GFX908: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 242 ; GFX908: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec 243 ; GFX908: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 244 ; GFX908: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 245 ; GFX908: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 246 ; GFX908: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec 247 ; GFX908: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc 248 ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 249 ; GFX908: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 250 ; GFX908: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 251 ; GFX908: S_CBRANCH_EXECNZ %bb.2, implicit $exec 252 ; GFX908: bb.3: 253 ; GFX908: successors: %bb.4(0x80000000) 254 ; GFX908: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 255 ; GFX908: bb.4: 256 ; GFX908: S_ENDPGM 0 257 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_voffset__vgpr_soffset 258 ; GFX90A: bb.1 (%ir-block.0): 259 ; GFX90A: successors: %bb.2(0x80000000) 260 ; GFX90A: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 261 ; GFX90A: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 262 ; GFX90A: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 263 ; GFX90A: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 264 ; GFX90A: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 265 ; GFX90A: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 266 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 267 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 268 ; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY]] 269 ; GFX90A: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 270 ; GFX90A: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 271 ; GFX90A: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 272 ; GFX90A: bb.2: 273 ; GFX90A: successors: %bb.3(0x40000000), %bb.2(0x40000000) 274 ; GFX90A: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 275 ; GFX90A: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 276 ; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 277 ; GFX90A: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec 278 ; GFX90A: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec 279 ; GFX90A: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec 280 ; GFX90A: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 281 ; GFX90A: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec 282 ; GFX90A: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 283 ; GFX90A: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 284 ; GFX90A: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 285 ; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec 286 ; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc 287 ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 288 ; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 289 ; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 290 ; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec 291 ; GFX90A: bb.3: 292 ; GFX90A: successors: %bb.4(0x80000000) 293 ; GFX90A: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 294 ; GFX90A: bb.4: 295 ; GFX90A: S_ENDPGM 0 296 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) 297 ret void 298} 299 300define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add4095(float %val, <4 x i32> inreg %rsrc, i32 %voffset.base, i32 inreg %soffset) { 301 ; GFX908-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add4095 302 ; GFX908: bb.1 (%ir-block.0): 303 ; GFX908: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 304 ; GFX908: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 305 ; GFX908: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 306 ; GFX908: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 307 ; GFX908: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 308 ; GFX908: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 309 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 310 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 311 ; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 312 ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 313 ; GFX908: S_ENDPGM 0 314 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add4095 315 ; GFX90A: bb.1 (%ir-block.0): 316 ; GFX90A: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 317 ; GFX90A: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 318 ; GFX90A: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 319 ; GFX90A: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 320 ; GFX90A: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 321 ; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 322 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 323 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 324 ; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 325 ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 326 ; GFX90A: S_ENDPGM 0 327 %voffset = add i32 %voffset.base, 4095 328 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 329 ret void 330} 331 332; Natural mapping + slc 333define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 334 ; GFX908-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc 335 ; GFX908: bb.1 (%ir-block.0): 336 ; GFX908: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 337 ; GFX908: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 338 ; GFX908: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 339 ; GFX908: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 340 ; GFX908: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 341 ; GFX908: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 342 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 343 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 344 ; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 345 ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 346 ; GFX908: S_ENDPGM 0 347 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc 348 ; GFX90A: bb.1 (%ir-block.0): 349 ; GFX90A: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 350 ; GFX90A: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 351 ; GFX90A: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 352 ; GFX90A: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 353 ; GFX90A: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 354 ; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 355 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 356 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 357 ; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 358 ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) 359 ; GFX90A: S_ENDPGM 0 360 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) 361 ret void 362} 363 364define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 365 ; GFX908-LABEL: name: raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset 366 ; GFX908: bb.1 (%ir-block.0): 367 ; GFX908: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 368 ; GFX908: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 369 ; GFX908: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 370 ; GFX908: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 371 ; GFX908: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 372 ; GFX908: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 373 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 374 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 375 ; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 376 ; GFX908: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) 377 ; GFX908: S_ENDPGM 0 378 ; GFX90A-LABEL: name: raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset 379 ; GFX90A: bb.1 (%ir-block.0): 380 ; GFX90A: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 381 ; GFX90A: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 382 ; GFX90A: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 383 ; GFX90A: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 384 ; GFX90A: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 385 ; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 386 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 387 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 388 ; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 389 ; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) 390 ; GFX90A: S_ENDPGM 0 391 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 392 ret void 393} 394 395define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 396 ; GFX908-LABEL: name: raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset 397 ; GFX908: bb.1 (%ir-block.0): 398 ; GFX908: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 399 ; GFX908: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 400 ; GFX908: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 401 ; GFX908: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 402 ; GFX908: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 403 ; GFX908: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 404 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 405 ; GFX908: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 406 ; GFX908: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) 407 ; GFX908: S_ENDPGM 0 408 ; GFX90A-LABEL: name: raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset 409 ; GFX90A: bb.1 (%ir-block.0): 410 ; GFX90A: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 411 ; GFX90A: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 412 ; GFX90A: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 413 ; GFX90A: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 414 ; GFX90A: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 415 ; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 416 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 417 ; GFX90A: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 418 ; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) 419 ; GFX90A: S_ENDPGM 0 420 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) 421 ret void 422} 423 424declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg) #0 425declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg) #0 426 427attributes #0 = { nounwind } 428