1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s 3; FIXME: Test with SI when argument lowering not broken for f16 4 5; Natural mapping 6define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 7 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset 8 ; CHECK: bb.1 (%ir-block.0): 9 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 10 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 11 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 12 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 13 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 14 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 15 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 16 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 17 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 18 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) 19 ; CHECK: S_ENDPGM 0 20 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 21 ret void 22} 23 24; Copies for VGPR arguments 25define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, float inreg %val, i32 inreg %voffset, i32 inreg %soffset) { 26 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr_soffset 27 ; CHECK: bb.1 (%ir-block.0): 28 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8 29 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 30 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 31 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 32 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 33 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 34 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr7 35 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr8 36 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 37 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] 38 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 39 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) 40 ; CHECK: S_ENDPGM 0 41 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 42 ret void 43} 44 45; Waterfall for rsrc 46define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 47 ; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset 48 ; CHECK: bb.1 (%ir-block.0): 49 ; CHECK: successors: %bb.2(0x80000000) 50 ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 51 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 52 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 53 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 54 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 55 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 56 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 57 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 58 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 59 ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 60 ; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 61 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 62 ; CHECK: bb.2: 63 ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) 64 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 65 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 66 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 67 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec 68 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec 69 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec 70 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 71 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec 72 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 73 ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 74 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) 75 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 76 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 77 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 78 ; CHECK: bb.3: 79 ; CHECK: successors: %bb.4(0x80000000) 80 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 81 ; CHECK: bb.4: 82 ; CHECK: S_ENDPGM 0 83 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 84 ret void 85} 86 87; Waterfall for soffset 88define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 %soffset) { 89 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset 90 ; CHECK: bb.1 (%ir-block.0): 91 ; CHECK: successors: %bb.2(0x80000000) 92 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2 93 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 94 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 95 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 96 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 97 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 98 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 99 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 100 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 101 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 102 ; CHECK: bb.2: 103 ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) 104 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 105 ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec 106 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) 107 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 108 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 109 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 110 ; CHECK: bb.3: 111 ; CHECK: successors: %bb.4(0x80000000) 112 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 113 ; CHECK: bb.4: 114 ; CHECK: S_ENDPGM 0 115 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 116 ret void 117} 118 119; Waterfall for rsrc and soffset 120define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, float %val, i32 %voffset, i32 %soffset) { 121 ; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset 122 ; CHECK: bb.1 (%ir-block.0): 123 ; CHECK: successors: %bb.2(0x80000000) 124 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 125 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 126 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 127 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 128 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 129 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 130 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 131 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 132 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 133 ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 134 ; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 135 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 136 ; CHECK: bb.2: 137 ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) 138 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 139 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 140 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 141 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec 142 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec 143 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec 144 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 145 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec 146 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 147 ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 148 ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 149 ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec 150 ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc 151 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) 152 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 153 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 154 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 155 ; CHECK: bb.3: 156 ; CHECK: successors: %bb.4(0x80000000) 157 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 158 ; CHECK: bb.4: 159 ; CHECK: S_ENDPGM 0 160 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 161 ret void 162} 163 164define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 165 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc 166 ; CHECK: bb.1 (%ir-block.0): 167 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 168 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 169 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 170 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 171 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 172 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 173 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 174 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 175 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 176 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) 177 ; CHECK: S_ENDPGM 0 178 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1) 179 ret void 180} 181 182define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 183 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc 184 ; CHECK: bb.1 (%ir-block.0): 185 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 186 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 187 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 188 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 189 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 190 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 191 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 192 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 193 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 194 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) 195 ; CHECK: S_ENDPGM 0 196 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) 197 ret void 198} 199 200define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 201 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc 202 ; CHECK: bb.1 (%ir-block.0): 203 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 204 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 205 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 206 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 207 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 208 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 209 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 210 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 211 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 212 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) 213 ; CHECK: S_ENDPGM 0 214 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 3) 215 ret void 216} 217 218define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_dlc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 219 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_dlc 220 ; CHECK: bb.1 (%ir-block.0): 221 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 222 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 223 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 224 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 225 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 226 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 227 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 228 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 229 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 230 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) 231 ; CHECK: S_ENDPGM 0 232 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4) 233 ret void 234} 235 236define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc_dlc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 237 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc_dlc 238 ; CHECK: bb.1 (%ir-block.0): 239 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 240 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 241 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 242 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 243 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 244 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 245 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 246 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 247 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 248 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) 249 ; CHECK: S_ENDPGM 0 250 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6) 251 ret void 252} 253 254define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_dlc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 255 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_dlc 256 ; CHECK: bb.1 (%ir-block.0): 257 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 258 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 259 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 260 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 261 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 262 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 263 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 264 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 265 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 266 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) 267 ; CHECK: S_ENDPGM 0 268 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5) 269 ret void 270} 271 272define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc_dlc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 273 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc_dlc 274 ; CHECK: bb.1 (%ir-block.0): 275 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 276 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 277 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 278 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 279 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 280 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 281 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 282 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 283 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 284 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) 285 ; CHECK: S_ENDPGM 0 286 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7) 287 ret void 288} 289 290define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) { 291 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32 292 ; CHECK: bb.1 (%ir-block.0): 293 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 294 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 295 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 296 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 297 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 298 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 299 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 300 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 301 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 302 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 303 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 304 ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4) 305 ; CHECK: S_ENDPGM 0 306 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 307 ret void 308} 309 310define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32(<4 x i32> inreg %rsrc, <3 x float> %val, i32 %voffset, i32 inreg %soffset) { 311 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32 312 ; CHECK: bb.1 (%ir-block.0): 313 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 314 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 315 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 316 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 317 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 318 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 319 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 320 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 321 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 322 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 323 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 324 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 325 ; CHECK: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "TargetCustom7", align 1, addrspace 4) 326 ; CHECK: S_ENDPGM 0 327 call void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 328 ret void 329} 330 331define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32(<4 x i32> inreg %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) { 332 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32 333 ; CHECK: bb.1 (%ir-block.0): 334 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 335 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 336 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 337 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 338 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 339 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 340 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 341 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 342 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 343 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 344 ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 345 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 346 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 347 ; CHECK: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) 348 ; CHECK: S_ENDPGM 0 349 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 350 ret void 351} 352 353define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i8(<4 x i32> inreg %rsrc, i32 %val, i32 %voffset, i32 inreg %soffset) { 354 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i8 355 ; CHECK: bb.1 (%ir-block.0): 356 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 357 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 358 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 359 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 360 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 361 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 362 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 363 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 364 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 365 ; CHECK: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "TargetCustom7", addrspace 4) 366 ; CHECK: S_ENDPGM 0 367 %val.trunc = trunc i32 %val to i8 368 call void @llvm.amdgcn.raw.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 369 ret void 370} 371 372define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i16(<4 x i32> inreg %rsrc, i32 %val, i32 %voffset, i32 inreg %soffset) { 373 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i16 374 ; CHECK: bb.1 (%ir-block.0): 375 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 376 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 377 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 378 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 379 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 380 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 381 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 382 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 383 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 384 ; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4) 385 ; CHECK: S_ENDPGM 0 386 %val.trunc = trunc i32 %val to i16 387 call void @llvm.amdgcn.raw.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 388 ret void 389} 390 391define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16(<4 x i32> inreg %rsrc, half %val, i32 %voffset, i32 inreg %soffset) { 392 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16 393 ; CHECK: bb.1 (%ir-block.0): 394 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 395 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 396 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 397 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 398 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 399 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 400 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 401 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 402 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 403 ; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4) 404 ; CHECK: S_ENDPGM 0 405 call void @llvm.amdgcn.raw.buffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 406 ret void 407} 408 409define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { 410 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16 411 ; CHECK: bb.1 (%ir-block.0): 412 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 413 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 414 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 415 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 416 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 417 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 418 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 419 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 420 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 421 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) 422 ; CHECK: S_ENDPGM 0 423 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 424 ret void 425} 426 427define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16(<4 x i32> inreg %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) { 428 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16 429 ; CHECK: bb.1 (%ir-block.0): 430 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 431 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 432 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 433 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 434 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 435 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 436 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 437 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 438 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 439 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 440 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 441 ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4) 442 ; CHECK: S_ENDPGM 0 443 call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 444 ret void 445} 446 447define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16(<4 x i32> %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) { 448 ; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16 449 ; CHECK: bb.1 (%ir-block.0): 450 ; CHECK: successors: %bb.2(0x80000000) 451 ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 452 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 453 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 454 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 455 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 456 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 457 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 458 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 459 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 460 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 461 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 462 ; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 463 ; CHECK: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 464 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 465 ; CHECK: bb.2: 466 ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) 467 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec 468 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec 469 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 470 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec 471 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec 472 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec 473 ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 474 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec 475 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 476 ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 477 ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4) 478 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 479 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 480 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 481 ; CHECK: bb.3: 482 ; CHECK: successors: %bb.4(0x80000000) 483 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 484 ; CHECK: bb.4: 485 ; CHECK: S_ENDPGM 0 486 call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 487 ret void 488} 489 490define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4095(<4 x i32> inreg %rsrc, float %val, i32 inreg %soffset) { 491 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4095 492 ; CHECK: bb.1 (%ir-block.0): 493 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 494 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 495 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 496 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 497 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 498 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 499 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 500 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 501 ; CHECK: BUFFER_STORE_DWORD_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4095, align 1, addrspace 4) 502 ; CHECK: S_ENDPGM 0 503 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) 504 ret void 505} 506 507define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4096(<4 x i32> inreg %rsrc, float %val, i32 inreg %soffset) { 508 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4096 509 ; CHECK: bb.1 (%ir-block.0): 510 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 511 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 512 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 513 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 514 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 515 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 516 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 517 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 518 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 519 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 520 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4096, align 1, addrspace 4) 521 ; CHECK: S_ENDPGM 0 522 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0) 523 ret void 524} 525 526define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_16(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 527 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_16 528 ; CHECK: bb.1 (%ir-block.0): 529 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 530 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 531 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 532 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 533 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 534 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 535 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 536 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 537 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 538 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 16, align 1, addrspace 4) 539 ; CHECK: S_ENDPGM 0 540 %voffset.add = add i32 %voffset, 16 541 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 542 ret void 543} 544 545define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4095(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 546 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4095 547 ; CHECK: bb.1 (%ir-block.0): 548 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 549 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 550 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 551 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 552 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 553 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 554 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 555 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 556 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 557 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4095, align 1, addrspace 4) 558 ; CHECK: S_ENDPGM 0 559 %voffset.add = add i32 %voffset, 4095 560 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 561 ret void 562} 563 564define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4096(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 565 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4096 566 ; CHECK: bb.1 (%ir-block.0): 567 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 568 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 569 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 570 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 571 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 572 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 573 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 574 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 575 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 576 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 577 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 578 ; CHECK: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec 579 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4096, align 1, addrspace 4) 580 ; CHECK: S_ENDPGM 0 581 %voffset.add = add i32 %voffset, 4096 582 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 583 ret void 584} 585 586define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { 587 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095 588 ; CHECK: bb.1 (%ir-block.0): 589 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 590 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 591 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 592 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 593 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 594 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 595 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 596 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 597 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 598 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) 599 ; CHECK: S_ENDPGM 0 600 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) 601 ret void 602} 603 604define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { 605 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096 606 ; CHECK: bb.1 (%ir-block.0): 607 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 608 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 609 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 610 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 611 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 612 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 613 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 614 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 615 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 616 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) 617 ; CHECK: S_ENDPGM 0 618 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) 619 ret void 620} 621 622define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_16(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { 623 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_16 624 ; CHECK: bb.1 (%ir-block.0): 625 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 626 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 627 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 628 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 629 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 630 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 631 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 632 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 633 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 634 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 16, align 1, addrspace 4) 635 ; CHECK: S_ENDPGM 0 636 %voffset.add = add i32 %voffset, 16 637 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 638 ret void 639} 640 641define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4095(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { 642 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4095 643 ; CHECK: bb.1 (%ir-block.0): 644 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 645 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 646 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 647 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 648 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 649 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 650 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 651 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 652 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 653 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4095, align 1, addrspace 4) 654 ; CHECK: S_ENDPGM 0 655 %voffset.add = add i32 %voffset, 4095 656 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 657 ret void 658} 659 660define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4096(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { 661 ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4096 662 ; CHECK: bb.1 (%ir-block.0): 663 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 664 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 665 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 666 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 667 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 668 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 669 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 670 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 671 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 672 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 673 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 674 ; CHECK: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec 675 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4096, align 1, addrspace 4) 676 ; CHECK: S_ENDPGM 0 677 %voffset.add = add i32 %voffset, 4096 678 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 679 ret void 680} 681 682; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop. 683define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_offset_add_5000(<4 x i32> %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 684 ; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_offset_add_5000 685 ; CHECK: bb.1 (%ir-block.0): 686 ; CHECK: successors: %bb.2(0x80000000) 687 ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 688 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 689 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 690 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 691 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 692 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 693 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 694 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 695 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 696 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 697 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 698 ; CHECK: %13:vgpr_32, dead %33:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec 699 ; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 700 ; CHECK: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 701 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 702 ; CHECK: bb.2: 703 ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) 704 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec 705 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec 706 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 707 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec 708 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec 709 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec 710 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 711 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec 712 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 713 ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 714 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %13, [[REG_SEQUENCE3]], [[COPY6]], 904, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 5000, align 1, addrspace 4) 715 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 716 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 717 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 718 ; CHECK: bb.3: 719 ; CHECK: successors: %bb.4(0x80000000) 720 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 721 ; CHECK: bb.4: 722 ; CHECK: S_ENDPGM 0 723 %voffset.add = add i32 %voffset, 5000 724 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 725 ret void 726} 727 728; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop. 729define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr_soffset_offset(<4 x i32> %rsrc, float %val, i32 inreg %soffset) { 730 ; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr_soffset_offset 731 ; CHECK: bb.1 (%ir-block.0): 732 ; CHECK: successors: %bb.2(0x80000000) 733 ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 734 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 735 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 736 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 737 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 738 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 739 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 740 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 741 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 742 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 743 ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 744 ; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 745 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 746 ; CHECK: bb.2: 747 ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) 748 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 749 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 750 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 751 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec 752 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec 753 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec 754 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 755 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec 756 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 757 ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 758 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 5000, align 1, addrspace 4) 759 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 760 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 761 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 762 ; CHECK: bb.3: 763 ; CHECK: successors: %bb.4(0x80000000) 764 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 765 ; CHECK: bb.4: 766 ; CHECK: S_ENDPGM 0 767 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 5000, i32 %soffset, i32 0) 768 ret void 769} 770 771declare void @llvm.amdgcn.raw.buffer.store.i8(i8, <4 x i32>, i32, i32, i32 immarg) 772declare void @llvm.amdgcn.raw.buffer.store.i16(i16, <4 x i32>, i32, i32, i32 immarg) 773 774declare void @llvm.amdgcn.raw.buffer.store.f16(half, <4 x i32>, i32, i32, i32 immarg) 775declare void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg) 776declare void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32 immarg) 777 778declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) 779declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32 immarg) 780declare void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32 immarg) 781declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg) 782