1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s 3; FIXME: Test with SI when argument lowering not broken for f16 4 5; Natural mapping 6define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 7 ; CHECK-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 8 ; CHECK: bb.1 (%ir-block.0): 9 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 10 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 11 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 12 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 13 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 14 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 15 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 16 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 17 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 18 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 19 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 20 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 21 ret float %val 22} 23 24; Copies for VGPR arguments 25define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__sgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 inreg %voffset, i32 inreg %soffset) { 26 ; CHECK-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__sgpr_voffset__sgpr_soffset 27 ; CHECK: bb.1 (%ir-block.0): 28 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7 29 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 30 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 31 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 32 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 33 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 34 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 35 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr7 36 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] 37 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 38 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 39 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 40 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 41 ret float %val 42} 43 44; Waterfall for rsrc 45define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 inreg %soffset) { 46 ; CHECK-LABEL: name: raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffset 47 ; CHECK: bb.1 (%ir-block.0): 48 ; CHECK: successors: %bb.2(0x80000000) 49 ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 50 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 51 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 52 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 53 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 54 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 55 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 56 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 57 ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 58 ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 59 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 60 ; CHECK: bb.2: 61 ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) 62 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 63 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 64 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 65 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 66 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 67 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 68 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 69 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 70 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 71 ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 72 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 73 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 74 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 75 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 76 ; CHECK: bb.3: 77 ; CHECK: successors: %bb.4(0x80000000) 78 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 79 ; CHECK: bb.4: 80 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 81 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 82 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 83 ret float %val 84} 85 86; Waterfall for rsrc and soffset 87define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 88 ; CHECK-LABEL: name: raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffset 89 ; CHECK: bb.1 (%ir-block.0): 90 ; CHECK: successors: %bb.2(0x80000000) 91 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 92 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 93 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 94 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 95 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 96 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 97 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 98 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 99 ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 100 ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 101 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 102 ; CHECK: bb.2: 103 ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) 104 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 105 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 106 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 107 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 108 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 109 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 110 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 111 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 112 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 113 ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 114 ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 115 ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec 116 ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc 117 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 118 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 119 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 120 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 121 ; CHECK: bb.3: 122 ; CHECK: successors: %bb.4(0x80000000) 123 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 124 ; CHECK: bb.4: 125 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 126 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 127 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 128 ret float %val 129} 130 131; Natural mapping + glc 132define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 133 ; CHECK-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc 134 ; CHECK: bb.1 (%ir-block.0): 135 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 136 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 137 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 138 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 139 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 140 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 141 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 142 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 143 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 144 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 145 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 146 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1) 147 ret float %val 148} 149 150; Natural mapping + slc 151define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 152 ; CHECK-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc 153 ; CHECK: bb.1 (%ir-block.0): 154 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 155 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 156 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 157 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 158 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 159 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 160 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 161 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 162 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 2, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 163 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 164 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 165 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) 166 ret float %val 167} 168 169; Natural mapping + dlc 170define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 171 ; CHECK-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc 172 ; CHECK: bb.1 (%ir-block.0): 173 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 174 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 175 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 176 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 177 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 178 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 179 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 180 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 181 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 4, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 182 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 183 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 184 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4) 185 ret float %val 186} 187 188; Natural mapping + slc + dlc 189define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_dlc(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 190 ; CHECK-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_dlc 191 ; CHECK: bb.1 (%ir-block.0): 192 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 193 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 194 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 195 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 196 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 197 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 198 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 199 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 200 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 6, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 201 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 202 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 203 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6) 204 ret float %val 205} 206 207; Natural mapping + glc + dlc 208define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc_dlc(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 209 ; CHECK-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc_dlc 210 ; CHECK: bb.1 (%ir-block.0): 211 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 212 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 213 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 214 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 215 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 216 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 217 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 218 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 219 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 5, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 220 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 221 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 222 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5) 223 ret float %val 224} 225 226; Natural mapping + glc + slc + dlc 227define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc_slc_dlc(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 228 ; CHECK-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc_slc_dlc 229 ; CHECK: bb.1 (%ir-block.0): 230 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 231 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 232 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 233 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 234 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 235 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 236 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 237 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 238 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 7, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 239 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 240 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 241 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7) 242 ret float %val 243} 244 245; Natural mapping 246define amdgpu_ps <2 x float> @raw_buffer_load_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 247 ; CHECK-LABEL: name: raw_buffer_load_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 248 ; CHECK: bb.1 (%ir-block.0): 249 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 250 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 251 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 252 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 253 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 254 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 255 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 256 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 257 ; CHECK: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 4) 258 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 259 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 260 ; CHECK: $vgpr0 = COPY [[COPY6]] 261 ; CHECK: $vgpr1 = COPY [[COPY7]] 262 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 263 %val = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 264 ret <2 x float> %val 265} 266 267define amdgpu_ps <3 x float> @raw_buffer_load_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 268 ; CHECK-LABEL: name: raw_buffer_load_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 269 ; CHECK: bb.1 (%ir-block.0): 270 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 271 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 272 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 273 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 274 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 275 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 276 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 277 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 278 ; CHECK: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 4) 279 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub0 280 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub1 281 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub2 282 ; CHECK: $vgpr0 = COPY [[COPY6]] 283 ; CHECK: $vgpr1 = COPY [[COPY7]] 284 ; CHECK: $vgpr2 = COPY [[COPY8]] 285 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 286 %val = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 287 ret <3 x float> %val 288} 289 290define amdgpu_ps <4 x float> @raw_buffer_load_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 291 ; CHECK-LABEL: name: raw_buffer_load_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 292 ; CHECK: bb.1 (%ir-block.0): 293 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 294 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 295 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 296 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 297 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 298 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 299 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 300 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 301 ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4) 302 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 303 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 304 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 305 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 306 ; CHECK: $vgpr0 = COPY [[COPY6]] 307 ; CHECK: $vgpr1 = COPY [[COPY7]] 308 ; CHECK: $vgpr2 = COPY [[COPY8]] 309 ; CHECK: $vgpr3 = COPY [[COPY9]] 310 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 311 %val = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 312 ret <4 x float> %val 313} 314 315define amdgpu_ps half @raw_buffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 316 ; CHECK-LABEL: name: raw_buffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 317 ; CHECK: bb.1 (%ir-block.0): 318 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 319 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 320 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 321 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 322 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 323 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 324 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 325 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 326 ; CHECK: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) 327 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] 328 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 329 %val = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 330 ret half %val 331} 332 333define amdgpu_ps <2 x half> @raw_buffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 334 ; CHECK-LABEL: name: raw_buffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 335 ; CHECK: bb.1 (%ir-block.0): 336 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 337 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 338 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 339 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 340 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 341 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 342 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 343 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 344 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 4) 345 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 346 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 347 %val = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 348 ret <2 x half> %val 349} 350 351; FIXME: Crashes 352; define amdgpu_ps <3 x half> @raw_buffer_load_v3f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 353; %val = call <3 x half> @llvm.amdgcn.raw.buffer.load.v3f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 354; ret <3 x half> %val 355; } 356 357define amdgpu_ps <4 x half> @raw_buffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 358 ; CHECK-LABEL: name: raw_buffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 359 ; CHECK: bb.1 (%ir-block.0): 360 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 361 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 362 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 363 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 364 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 365 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 366 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 367 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 368 ; CHECK: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) 369 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 370 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 371 ; CHECK: $vgpr0 = COPY [[COPY6]] 372 ; CHECK: $vgpr1 = COPY [[COPY7]] 373 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 374 %val = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 375 ret <4 x half> %val 376} 377 378define amdgpu_ps float @raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset_zext(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 379 ; CHECK-LABEL: name: raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset_zext 380 ; CHECK: bb.1 (%ir-block.0): 381 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 382 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 383 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 384 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 385 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 386 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 387 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 388 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 389 ; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 4) 390 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] 391 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 392 %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 393 %zext = zext i8 %val to i32 394 %cast = bitcast i32 %zext to float 395 ret float %cast 396} 397 398define amdgpu_ps float @raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset_sext(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 399 ; CHECK-LABEL: name: raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset_sext 400 ; CHECK: bb.1 (%ir-block.0): 401 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 402 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 403 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 404 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 405 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 406 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 407 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 408 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 409 ; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 4) 410 ; CHECK: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_UBYTE_OFFEN]], 0, 8, implicit $exec 411 ; CHECK: $vgpr0 = COPY [[V_BFE_I32_e64_]] 412 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 413 %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 414 %zext = sext i8 %val to i32 415 %cast = bitcast i32 %zext to float 416 ret float %cast 417} 418 419; Waterfall for rsrc 420define amdgpu_ps half @raw_buffer_load_f16__vgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 inreg %soffset) { 421 ; CHECK-LABEL: name: raw_buffer_load_f16__vgpr_rsrc__vgpr_voffset__sgpr_soffset 422 ; CHECK: bb.1 (%ir-block.0): 423 ; CHECK: successors: %bb.2(0x80000000) 424 ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 425 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 426 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 427 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 428 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 429 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 430 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 431 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 432 ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 433 ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 434 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 435 ; CHECK: bb.2: 436 ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) 437 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 438 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 439 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 440 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 441 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 442 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 443 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 444 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 445 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 446 ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 447 ; CHECK: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) 448 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 449 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 450 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 451 ; CHECK: bb.3: 452 ; CHECK: successors: %bb.4(0x80000000) 453 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 454 ; CHECK: bb.4: 455 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] 456 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 457 %val = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 458 ret half %val 459} 460 461; Waterfall for rsrc 462define amdgpu_ps float @raw_buffer_load_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 inreg %soffset) { 463 ; CHECK-LABEL: name: raw_buffer_load_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffset 464 ; CHECK: bb.1 (%ir-block.0): 465 ; CHECK: successors: %bb.2(0x80000000) 466 ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 467 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 468 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 469 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 470 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 471 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 472 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 473 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 474 ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 475 ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 476 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 477 ; CHECK: bb.2: 478 ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) 479 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 480 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 481 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 482 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 483 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 484 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 485 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 486 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 487 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 488 ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 489 ; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 4) 490 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 491 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 492 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 493 ; CHECK: bb.3: 494 ; CHECK: successors: %bb.4(0x80000000) 495 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 496 ; CHECK: bb.4: 497 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] 498 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 499 %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 500 %zext = zext i8 %val to i32 501 %cast = bitcast i32 %zext to float 502 ret float %cast 503} 504 505define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vdpr_voffset__sgpr_soffset__voffset0(<4 x i32> inreg %rsrc, i32 inreg %soffset) { 506 ; CHECK-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vdpr_voffset__sgpr_soffset__voffset0 507 ; CHECK: bb.1 (%ir-block.0): 508 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 509 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 510 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 511 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 512 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 513 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 514 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 515 ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 516 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] 517 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 518 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) 519 ret float %val 520} 521 522define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4095(<4 x i32> inreg %rsrc, i32 inreg %soffset) { 523 ; CHECK-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4095 524 ; CHECK: bb.1 (%ir-block.0): 525 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 526 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 527 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 528 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 529 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 530 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 531 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 532 ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 533 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] 534 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 535 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) 536 ret float %val 537} 538 539define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4096(<4 x i32> inreg %rsrc, i32 inreg %soffset) { 540 ; CHECK-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4096 541 ; CHECK: bb.1 (%ir-block.0): 542 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 543 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 544 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 545 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 546 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 547 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 548 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 549 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 550 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 551 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 552 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 553 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 554 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0) 555 ret float %val 556} 557 558define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add16(<4 x i32> inreg %rsrc, i32 %voffset.base, i32 inreg %soffset) { 559 ; CHECK-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add16 560 ; CHECK: bb.1 (%ir-block.0): 561 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 562 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 563 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 564 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 565 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 566 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 567 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 568 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 569 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 570 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 571 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 572 %voffset = add i32 %voffset.base, 16 573 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 574 ret float %val 575} 576 577define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095(<4 x i32> inreg %rsrc, i32 %voffset.base, i32 inreg %soffset) { 578 ; CHECK-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095 579 ; CHECK: bb.1 (%ir-block.0): 580 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 581 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 582 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 583 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 584 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 585 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 586 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 587 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 588 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 589 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 590 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 591 %voffset = add i32 %voffset.base, 4095 592 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 593 ret float %val 594} 595 596define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4096(<4 x i32> inreg %rsrc, i32 %voffset.base, i32 inreg %soffset) { 597 ; CHECK-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4096 598 ; CHECK: bb.1 (%ir-block.0): 599 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 600 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 601 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 602 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 603 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 604 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 605 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 606 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 607 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 608 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 609 ; CHECK: %10:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec 610 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 611 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 612 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 613 %voffset = add i32 %voffset.base, 4096 614 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 615 ret float %val 616} 617 618define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4095(<4 x i32> inreg %rsrc, i32 %voffset) { 619 ; CHECK-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4095 620 ; CHECK: bb.1 (%ir-block.0): 621 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 622 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 623 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 624 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 625 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 626 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 627 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 628 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 629 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 630 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 631 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 632 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) 633 ret float %val 634} 635 636define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4096(<4 x i32> inreg %rsrc, i32 %voffset) { 637 ; CHECK-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4096 638 ; CHECK: bb.1 (%ir-block.0): 639 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 640 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 641 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 642 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 643 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 644 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 645 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 646 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 647 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 648 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 649 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 650 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) 651 ret float %val 652} 653 654define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add16(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset.base) { 655 ; CHECK-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add16 656 ; CHECK: bb.1 (%ir-block.0): 657 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 658 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 659 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 660 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 661 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 662 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 663 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 664 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 665 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 666 ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc 667 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 668 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 669 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 670 %soffset = add i32 %soffset.base, 16 671 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 672 ret float %val 673} 674 675define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4095(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset.base) { 676 ; CHECK-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4095 677 ; CHECK: bb.1 (%ir-block.0): 678 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 679 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 680 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 681 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 682 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 683 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 684 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 685 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 686 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 687 ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc 688 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 689 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 690 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 691 %soffset = add i32 %soffset.base, 4095 692 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 693 ret float %val 694} 695 696define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4096(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset.base) { 697 ; CHECK-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4096 698 ; CHECK: bb.1 (%ir-block.0): 699 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 700 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 701 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 702 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 703 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 704 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 705 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 706 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 707 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 708 ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc 709 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 710 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 711 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 712 %soffset = add i32 %soffset.base, 4096 713 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 714 ret float %val 715} 716 717; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop. 718define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add5000(<4 x i32> %rsrc, i32 %voffset, i32 inreg %soffset.base) { 719 ; CHECK-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add5000 720 ; CHECK: bb.1 (%ir-block.0): 721 ; CHECK: successors: %bb.2(0x80000000) 722 ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 723 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 724 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 725 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 726 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 727 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 728 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 729 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 730 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 731 ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc 732 ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 733 ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 734 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 735 ; CHECK: bb.2: 736 ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) 737 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 738 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 739 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 740 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 741 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 742 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 743 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 744 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 745 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 746 ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 747 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 748 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 749 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 750 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 751 ; CHECK: bb.3: 752 ; CHECK: successors: %bb.4(0x80000000) 753 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 754 ; CHECK: bb.4: 755 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 756 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 757 %soffset = add i32 %soffset.base, 5000 758 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 759 ret float %val 760} 761 762; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop. 763define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add5000(<4 x i32> %rsrc, i32 %voffset.base, i32 inreg %soffset) { 764 ; CHECK-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add5000 765 ; CHECK: bb.1 (%ir-block.0): 766 ; CHECK: successors: %bb.2(0x80000000) 767 ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 768 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 769 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 770 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 771 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 772 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 773 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 774 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 775 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 776 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 777 ; CHECK: %14:vgpr_32, dead %35:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec 778 ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 779 ; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 780 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 781 ; CHECK: bb.2: 782 ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) 783 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 784 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 785 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 786 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec 787 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec 788 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec 789 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 790 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec 791 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 792 ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 793 ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %14, [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) 794 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 795 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 796 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 797 ; CHECK: bb.3: 798 ; CHECK: successors: %bb.4(0x80000000) 799 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 800 ; CHECK: bb.4: 801 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 802 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 803 %voffset = add i32 %voffset.base, 5000 804 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 805 ret float %val 806} 807 808declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32 immarg) 809declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32 immarg) 810declare <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32>, i32, i32, i32 immarg) 811declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32 immarg) 812 813declare half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32>, i32, i32, i32 immarg) 814declare <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32>, i32, i32, i32 immarg) 815declare <3 x half> @llvm.amdgcn.raw.buffer.load.v3f16(<4 x i32>, i32, i32, i32 immarg) 816declare <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32>, i32, i32, i32 immarg) 817 818declare i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32>, i32, i32, i32 immarg) 819