1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX7 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s 5 6; FIXME: Merge with regbankselect, which mostly overlaps when all types supported. 7 8; Natural mapping 9define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { 10 ; GFX6-LABEL: name: s_buffer_load_i32 11 ; GFX6: bb.1 (%ir-block.0): 12 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 13 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 14 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 15 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 16 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 17 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 18 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 19 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 4) 20 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 21 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 22 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 23 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 24 ; GFX7-LABEL: name: s_buffer_load_i32 25 ; GFX7: bb.1 (%ir-block.0): 26 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 27 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 28 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 29 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 30 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 31 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 32 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 33 ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 4) 34 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 35 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 36 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 37 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 38 ; GFX8-LABEL: name: s_buffer_load_i32 39 ; GFX8: bb.1 (%ir-block.0): 40 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 41 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 42 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 43 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 44 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 45 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 46 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 47 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 4) 48 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 49 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 50 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 51 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 52 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0) 53 ret i32 %val 54} 55 56define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %soffset) { 57 ; GFX6-LABEL: name: s_buffer_load_i32_glc 58 ; GFX6: bb.1 (%ir-block.0): 59 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 60 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 61 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 62 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 63 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 64 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 65 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 66 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1, 0 :: (dereferenceable invariant load 4) 67 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 68 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 69 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 70 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 71 ; GFX7-LABEL: name: s_buffer_load_i32_glc 72 ; GFX7: bb.1 (%ir-block.0): 73 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 74 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 75 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 76 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 77 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 78 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 79 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 80 ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1, 0 :: (dereferenceable invariant load 4) 81 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 82 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 83 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 84 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 85 ; GFX8-LABEL: name: s_buffer_load_i32_glc 86 ; GFX8: bb.1 (%ir-block.0): 87 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 88 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 89 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 90 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 91 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 92 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 93 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 94 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1, 0 :: (dereferenceable invariant load 4) 95 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 96 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 97 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 98 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 99 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 1) 100 ret i32 %val 101} 102 103define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { 104 ; GFX6-LABEL: name: s_buffer_load_v2i32 105 ; GFX6: bb.1 (%ir-block.0): 106 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 107 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 108 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 109 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 110 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 111 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 112 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 113 ; GFX6: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 8, align 4) 114 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 115 ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 116 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 117 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 118 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 119 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 120 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec 121 ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 122 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 123 ; GFX7-LABEL: name: s_buffer_load_v2i32 124 ; GFX7: bb.1 (%ir-block.0): 125 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 126 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 127 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 128 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 129 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 130 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 131 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 132 ; GFX7: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 8, align 4) 133 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 134 ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 135 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 136 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 137 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 138 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 139 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec 140 ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 141 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 142 ; GFX8-LABEL: name: s_buffer_load_v2i32 143 ; GFX8: bb.1 (%ir-block.0): 144 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 145 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 146 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 147 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 148 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 149 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 150 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 151 ; GFX8: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 8, align 4) 152 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 153 ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 154 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 155 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 156 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 157 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 158 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec 159 ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 160 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 161 %val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0) 162 ret <2 x i32> %val 163} 164 165define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { 166 ; GFX6-LABEL: name: s_buffer_load_v3i32 167 ; GFX6: bb.1 (%ir-block.0): 168 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 169 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 170 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 171 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 172 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 173 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 174 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 175 ; GFX6: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4) 176 ; GFX6: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0_sub1_sub2 177 ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub0 178 ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub1 179 ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub2 180 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 181 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec 182 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 183 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] 184 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec 185 ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 186 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 187 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec 188 ; GFX6: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] 189 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 190 ; GFX7-LABEL: name: s_buffer_load_v3i32 191 ; GFX7: bb.1 (%ir-block.0): 192 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 193 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 194 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 195 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 196 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 197 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 198 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 199 ; GFX7: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4) 200 ; GFX7: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0_sub1_sub2 201 ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub0 202 ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub1 203 ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub2 204 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 205 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec 206 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 207 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] 208 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec 209 ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 210 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 211 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec 212 ; GFX7: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] 213 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 214 ; GFX8-LABEL: name: s_buffer_load_v3i32 215 ; GFX8: bb.1 (%ir-block.0): 216 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 217 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 218 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 219 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 220 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 221 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 222 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 223 ; GFX8: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4) 224 ; GFX8: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0_sub1_sub2 225 ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub0 226 ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub1 227 ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub2 228 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 229 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec 230 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 231 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] 232 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec 233 ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 234 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 235 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec 236 ; GFX8: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] 237 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 238 %val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0) 239 ret <3 x i32> %val 240} 241 242define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { 243 ; GFX6-LABEL: name: s_buffer_load_v8i32 244 ; GFX6: bb.1 (%ir-block.0): 245 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 246 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 247 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 248 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 249 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 250 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 251 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 252 ; GFX6: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 32, align 4) 253 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 254 ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 255 ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 256 ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 257 ; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 258 ; GFX6: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 259 ; GFX6: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 260 ; GFX6: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 261 ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 262 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec 263 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 264 ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 265 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec 266 ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 267 ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] 268 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec 269 ; GFX6: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] 270 ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 271 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec 272 ; GFX6: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] 273 ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] 274 ; GFX6: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec 275 ; GFX6: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] 276 ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] 277 ; GFX6: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec 278 ; GFX6: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] 279 ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] 280 ; GFX6: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec 281 ; GFX6: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] 282 ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] 283 ; GFX6: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec 284 ; GFX6: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] 285 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 286 ; GFX7-LABEL: name: s_buffer_load_v8i32 287 ; GFX7: bb.1 (%ir-block.0): 288 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 289 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 290 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 291 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 292 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 293 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 294 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 295 ; GFX7: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 32, align 4) 296 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 297 ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 298 ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 299 ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 300 ; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 301 ; GFX7: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 302 ; GFX7: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 303 ; GFX7: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 304 ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 305 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec 306 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 307 ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 308 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec 309 ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 310 ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] 311 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec 312 ; GFX7: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] 313 ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 314 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec 315 ; GFX7: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] 316 ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] 317 ; GFX7: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec 318 ; GFX7: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] 319 ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] 320 ; GFX7: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec 321 ; GFX7: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] 322 ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] 323 ; GFX7: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec 324 ; GFX7: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] 325 ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] 326 ; GFX7: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec 327 ; GFX7: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] 328 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 329 ; GFX8-LABEL: name: s_buffer_load_v8i32 330 ; GFX8: bb.1 (%ir-block.0): 331 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 332 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 333 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 334 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 335 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 336 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 337 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 338 ; GFX8: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 32, align 4) 339 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 340 ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 341 ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 342 ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 343 ; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 344 ; GFX8: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 345 ; GFX8: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 346 ; GFX8: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 347 ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 348 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec 349 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 350 ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 351 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec 352 ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 353 ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] 354 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec 355 ; GFX8: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] 356 ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 357 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec 358 ; GFX8: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] 359 ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] 360 ; GFX8: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec 361 ; GFX8: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] 362 ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] 363 ; GFX8: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec 364 ; GFX8: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] 365 ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] 366 ; GFX8: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec 367 ; GFX8: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] 368 ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] 369 ; GFX8: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec 370 ; GFX8: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] 371 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 372 %val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0) 373 ret <8 x i32> %val 374} 375 376define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { 377 ; GFX6-LABEL: name: s_buffer_load_v16i32 378 ; GFX6: bb.1 (%ir-block.0): 379 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 380 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 381 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 382 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 383 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 384 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 385 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 386 ; GFX6: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 64, align 4) 387 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 388 ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 389 ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 390 ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 391 ; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 392 ; GFX6: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 393 ; GFX6: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 394 ; GFX6: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 395 ; GFX6: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 396 ; GFX6: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 397 ; GFX6: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 398 ; GFX6: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 399 ; GFX6: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 400 ; GFX6: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 401 ; GFX6: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 402 ; GFX6: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 403 ; GFX6: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 404 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec 405 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 406 ; GFX6: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 407 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec 408 ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 409 ; GFX6: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] 410 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec 411 ; GFX6: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] 412 ; GFX6: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 413 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec 414 ; GFX6: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] 415 ; GFX6: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] 416 ; GFX6: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec 417 ; GFX6: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] 418 ; GFX6: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] 419 ; GFX6: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec 420 ; GFX6: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] 421 ; GFX6: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] 422 ; GFX6: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec 423 ; GFX6: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] 424 ; GFX6: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] 425 ; GFX6: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec 426 ; GFX6: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] 427 ; GFX6: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] 428 ; GFX6: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec 429 ; GFX6: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] 430 ; GFX6: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] 431 ; GFX6: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec 432 ; GFX6: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] 433 ; GFX6: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] 434 ; GFX6: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec 435 ; GFX6: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] 436 ; GFX6: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] 437 ; GFX6: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec 438 ; GFX6: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] 439 ; GFX6: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] 440 ; GFX6: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec 441 ; GFX6: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] 442 ; GFX6: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] 443 ; GFX6: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec 444 ; GFX6: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] 445 ; GFX6: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] 446 ; GFX6: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec 447 ; GFX6: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] 448 ; GFX6: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] 449 ; GFX6: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec 450 ; GFX6: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] 451 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 452 ; GFX7-LABEL: name: s_buffer_load_v16i32 453 ; GFX7: bb.1 (%ir-block.0): 454 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 455 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 456 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 457 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 458 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 459 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 460 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 461 ; GFX7: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 64, align 4) 462 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 463 ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 464 ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 465 ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 466 ; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 467 ; GFX7: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 468 ; GFX7: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 469 ; GFX7: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 470 ; GFX7: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 471 ; GFX7: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 472 ; GFX7: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 473 ; GFX7: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 474 ; GFX7: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 475 ; GFX7: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 476 ; GFX7: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 477 ; GFX7: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 478 ; GFX7: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 479 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec 480 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 481 ; GFX7: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 482 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec 483 ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 484 ; GFX7: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] 485 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec 486 ; GFX7: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] 487 ; GFX7: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 488 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec 489 ; GFX7: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] 490 ; GFX7: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] 491 ; GFX7: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec 492 ; GFX7: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] 493 ; GFX7: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] 494 ; GFX7: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec 495 ; GFX7: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] 496 ; GFX7: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] 497 ; GFX7: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec 498 ; GFX7: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] 499 ; GFX7: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] 500 ; GFX7: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec 501 ; GFX7: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] 502 ; GFX7: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] 503 ; GFX7: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec 504 ; GFX7: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] 505 ; GFX7: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] 506 ; GFX7: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec 507 ; GFX7: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] 508 ; GFX7: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] 509 ; GFX7: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec 510 ; GFX7: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] 511 ; GFX7: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] 512 ; GFX7: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec 513 ; GFX7: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] 514 ; GFX7: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] 515 ; GFX7: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec 516 ; GFX7: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] 517 ; GFX7: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] 518 ; GFX7: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec 519 ; GFX7: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] 520 ; GFX7: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] 521 ; GFX7: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec 522 ; GFX7: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] 523 ; GFX7: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] 524 ; GFX7: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec 525 ; GFX7: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] 526 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 527 ; GFX8-LABEL: name: s_buffer_load_v16i32 528 ; GFX8: bb.1 (%ir-block.0): 529 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 530 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 531 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 532 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 533 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 534 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 535 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 536 ; GFX8: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 64, align 4) 537 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 538 ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 539 ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 540 ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 541 ; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 542 ; GFX8: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 543 ; GFX8: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 544 ; GFX8: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 545 ; GFX8: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 546 ; GFX8: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 547 ; GFX8: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 548 ; GFX8: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 549 ; GFX8: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 550 ; GFX8: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 551 ; GFX8: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 552 ; GFX8: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 553 ; GFX8: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 554 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec 555 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 556 ; GFX8: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 557 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec 558 ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 559 ; GFX8: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] 560 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec 561 ; GFX8: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] 562 ; GFX8: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 563 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec 564 ; GFX8: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] 565 ; GFX8: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] 566 ; GFX8: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec 567 ; GFX8: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] 568 ; GFX8: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] 569 ; GFX8: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec 570 ; GFX8: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] 571 ; GFX8: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] 572 ; GFX8: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec 573 ; GFX8: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] 574 ; GFX8: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] 575 ; GFX8: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec 576 ; GFX8: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] 577 ; GFX8: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] 578 ; GFX8: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec 579 ; GFX8: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] 580 ; GFX8: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] 581 ; GFX8: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec 582 ; GFX8: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] 583 ; GFX8: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] 584 ; GFX8: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec 585 ; GFX8: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] 586 ; GFX8: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] 587 ; GFX8: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec 588 ; GFX8: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] 589 ; GFX8: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] 590 ; GFX8: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec 591 ; GFX8: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] 592 ; GFX8: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] 593 ; GFX8: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec 594 ; GFX8: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] 595 ; GFX8: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] 596 ; GFX8: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec 597 ; GFX8: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] 598 ; GFX8: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] 599 ; GFX8: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec 600 ; GFX8: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] 601 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 602 %val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0) 603 ret <16 x i32> %val 604} 605 606define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) { 607 ; GFX6-LABEL: name: s_buffer_load_i32_offset_1 608 ; GFX6: bb.1 (%ir-block.0): 609 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 610 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 611 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 612 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 613 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 614 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 615 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 616 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 617 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 618 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 619 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 620 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 621 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1 622 ; GFX7: bb.1 (%ir-block.0): 623 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 624 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 625 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 626 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 627 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 628 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 629 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 630 ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 631 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 632 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 633 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 634 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 635 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1 636 ; GFX8: bb.1 (%ir-block.0): 637 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 638 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 639 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 640 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 641 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 642 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 643 ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0, 0 :: (dereferenceable invariant load 4) 644 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 645 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 646 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 647 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 648 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1, i32 0) 649 ret i32 %val 650} 651 652define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) { 653 ; GFX6-LABEL: name: s_buffer_load_i32_offset_glc_4 654 ; GFX6: bb.1 (%ir-block.0): 655 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 656 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 657 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 658 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 659 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 660 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 661 ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1, 0 :: (dereferenceable invariant load 4) 662 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 663 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 664 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 665 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 666 ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_4 667 ; GFX7: bb.1 (%ir-block.0): 668 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 669 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 670 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 671 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 672 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 673 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 674 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1, 0 :: (dereferenceable invariant load 4) 675 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 676 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 677 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 678 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 679 ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_4 680 ; GFX8: bb.1 (%ir-block.0): 681 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 682 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 683 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 684 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 685 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 686 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 687 ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1, 0 :: (dereferenceable invariant load 4) 688 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 689 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 690 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 691 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 692 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 4, i32 1) 693 ret i32 %val 694} 695 696define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) { 697 ; GFX6-LABEL: name: s_buffer_load_i32_offset_255 698 ; GFX6: bb.1 (%ir-block.0): 699 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 700 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 701 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 702 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 703 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 704 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 705 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 706 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 707 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 708 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 709 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 710 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 711 ; GFX7-LABEL: name: s_buffer_load_i32_offset_255 712 ; GFX7: bb.1 (%ir-block.0): 713 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 714 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 715 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 716 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 717 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 718 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 719 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 720 ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 721 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 722 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 723 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 724 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 725 ; GFX8-LABEL: name: s_buffer_load_i32_offset_255 726 ; GFX8: bb.1 (%ir-block.0): 727 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 728 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 729 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 730 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 731 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 732 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 733 ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0, 0 :: (dereferenceable invariant load 4) 734 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 735 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 736 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 737 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 738 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 255, i32 0) 739 ret i32 %val 740} 741 742define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) { 743 ; GFX6-LABEL: name: s_buffer_load_i32_offset_256 744 ; GFX6: bb.1 (%ir-block.0): 745 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 746 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 747 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 748 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 749 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 750 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 751 ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0, 0 :: (dereferenceable invariant load 4) 752 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 753 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 754 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 755 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 756 ; GFX7-LABEL: name: s_buffer_load_i32_offset_256 757 ; GFX7: bb.1 (%ir-block.0): 758 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 759 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 760 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 761 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 762 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 763 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 764 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0, 0 :: (dereferenceable invariant load 4) 765 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 766 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 767 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 768 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 769 ; GFX8-LABEL: name: s_buffer_load_i32_offset_256 770 ; GFX8: bb.1 (%ir-block.0): 771 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 772 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 773 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 774 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 775 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 776 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 777 ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0, 0 :: (dereferenceable invariant load 4) 778 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 779 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 780 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 781 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 782 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 256, i32 0) 783 ret i32 %val 784} 785 786define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) { 787 ; GFX6-LABEL: name: s_buffer_load_i32_offset_1020 788 ; GFX6: bb.1 (%ir-block.0): 789 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 790 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 791 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 792 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 793 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 794 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 795 ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0, 0 :: (dereferenceable invariant load 4) 796 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 797 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 798 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 799 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 800 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1020 801 ; GFX7: bb.1 (%ir-block.0): 802 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 803 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 804 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 805 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 806 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 807 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 808 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0, 0 :: (dereferenceable invariant load 4) 809 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 810 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 811 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 812 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 813 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1020 814 ; GFX8: bb.1 (%ir-block.0): 815 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 816 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 817 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 818 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 819 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 820 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 821 ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0, 0 :: (dereferenceable invariant load 4) 822 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 823 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 824 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 825 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 826 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1020, i32 0) 827 ret i32 %val 828} 829 830define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) { 831 ; GFX6-LABEL: name: s_buffer_load_i32_offset_1023 832 ; GFX6: bb.1 (%ir-block.0): 833 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 834 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 835 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 836 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 837 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 838 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 839 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 840 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 841 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 842 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 843 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 844 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 845 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1023 846 ; GFX7: bb.1 (%ir-block.0): 847 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 848 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 849 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 850 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 851 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 852 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 853 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 854 ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 855 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 856 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 857 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 858 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 859 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1023 860 ; GFX8: bb.1 (%ir-block.0): 861 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 862 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 863 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 864 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 865 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 866 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 867 ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0, 0 :: (dereferenceable invariant load 4) 868 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 869 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 870 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 871 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 872 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1023, i32 0) 873 ret i32 %val 874} 875 876define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) { 877 ; GFX6-LABEL: name: s_buffer_load_i32_offset_1024 878 ; GFX6: bb.1 (%ir-block.0): 879 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 880 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 881 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 882 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 883 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 884 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 885 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 886 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 887 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 888 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 889 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 890 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 891 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1024 892 ; GFX7: bb.1 (%ir-block.0): 893 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 894 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 895 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 896 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 897 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 898 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 899 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0, 0 :: (dereferenceable invariant load 4) 900 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] 901 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 902 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 903 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 904 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1024 905 ; GFX8: bb.1 (%ir-block.0): 906 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 907 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 908 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 909 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 910 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 911 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 912 ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0, 0 :: (dereferenceable invariant load 4) 913 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 914 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 915 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 916 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 917 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1024, i32 0) 918 ret i32 %val 919} 920 921define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) { 922 ; GFX6-LABEL: name: s_buffer_load_i32_offset_1025 923 ; GFX6: bb.1 (%ir-block.0): 924 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 925 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 926 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 927 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 928 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 929 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 930 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 931 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 932 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 933 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 934 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 935 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 936 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1025 937 ; GFX7: bb.1 (%ir-block.0): 938 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 939 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 940 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 941 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 942 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 943 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 944 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 945 ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 946 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 947 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 948 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 949 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 950 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1025 951 ; GFX8: bb.1 (%ir-block.0): 952 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 953 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 954 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 955 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 956 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 957 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 958 ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0, 0 :: (dereferenceable invariant load 4) 959 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 960 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 961 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 962 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 963 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1025, i32 0) 964 ret i32 %val 965} 966 967define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) { 968 ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg1 969 ; GFX6: bb.1 (%ir-block.0): 970 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 971 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 972 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 973 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 974 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 975 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 976 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 977 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 978 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 979 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 980 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 981 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 982 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg1 983 ; GFX7: bb.1 (%ir-block.0): 984 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 985 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 986 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 987 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 988 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 989 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 990 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 991 ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 992 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 993 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 994 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 995 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 996 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg1 997 ; GFX8: bb.1 (%ir-block.0): 998 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 999 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1000 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1001 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1002 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1003 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1004 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 1005 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1006 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1007 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1008 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1009 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 1010 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0) 1011 ret i32 %load 1012} 1013 1014define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) { 1015 ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg4 1016 ; GFX6: bb.1 (%ir-block.0): 1017 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1018 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1019 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1020 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1021 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1022 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1023 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292 1024 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1025 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1026 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1027 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1028 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 1029 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg4 1030 ; GFX7: bb.1 (%ir-block.0): 1031 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1032 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1033 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1034 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1035 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1036 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1037 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0, 0 :: (dereferenceable invariant load 4) 1038 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] 1039 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1040 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1041 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 1042 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg4 1043 ; GFX8: bb.1 (%ir-block.0): 1044 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1045 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1046 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1047 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1048 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1049 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1050 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292 1051 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1052 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1053 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1054 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1055 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 1056 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0) 1057 ret i32 %load 1058} 1059 1060define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) { 1061 ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg8 1062 ; GFX6: bb.1 (%ir-block.0): 1063 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1064 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1065 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1066 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1067 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1068 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1069 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967288 1070 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1071 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1072 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1073 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1074 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 1075 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg8 1076 ; GFX7: bb.1 (%ir-block.0): 1077 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1078 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1079 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1080 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1081 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1082 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1083 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0, 0 :: (dereferenceable invariant load 4) 1084 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] 1085 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1086 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1087 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 1088 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg8 1089 ; GFX8: bb.1 (%ir-block.0): 1090 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1091 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1092 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1093 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1094 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1095 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1096 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967288 1097 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1098 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1099 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1100 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1101 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 1102 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0) 1103 ret i32 %load 1104} 1105 1106define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) { 1107 ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit31 1108 ; GFX6: bb.1 (%ir-block.0): 1109 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1110 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1111 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1112 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1113 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1114 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1115 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 1116 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1117 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1118 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1119 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1120 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 1121 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit31 1122 ; GFX7: bb.1 (%ir-block.0): 1123 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1124 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1125 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1126 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1127 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1128 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1129 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0, 0 :: (dereferenceable invariant load 4) 1130 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] 1131 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1132 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1133 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 1134 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit31 1135 ; GFX8: bb.1 (%ir-block.0): 1136 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1137 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1138 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1139 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1140 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1141 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1142 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 1143 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1144 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1145 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1146 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1147 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 1148 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0) 1149 ret i32 %load 1150} 1151 1152define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc) { 1153 ; GFX6-LABEL: name: s_buffer_load_i32_offset_glc_bit30 1154 ; GFX6: bb.1 (%ir-block.0): 1155 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1156 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1157 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1158 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1159 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1160 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1161 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 1162 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1, 0 :: (dereferenceable invariant load 4) 1163 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1164 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1165 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1166 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 1167 ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_bit30 1168 ; GFX7: bb.1 (%ir-block.0): 1169 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1170 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1171 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1172 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1173 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1174 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1175 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1, 0 :: (dereferenceable invariant load 4) 1176 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] 1177 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1178 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1179 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 1180 ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_bit30 1181 ; GFX8: bb.1 (%ir-block.0): 1182 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1183 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1184 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1185 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1186 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1187 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1188 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 1189 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1, 0 :: (dereferenceable invariant load 4) 1190 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1191 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1192 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1193 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 1194 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 1) 1195 ret i32 %load 1196} 1197 1198define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) { 1199 ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit29 1200 ; GFX6: bb.1 (%ir-block.0): 1201 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1202 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1203 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1204 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1205 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1206 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1207 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 1208 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1209 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1210 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1211 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1212 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 1213 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit29 1214 ; GFX7: bb.1 (%ir-block.0): 1215 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1216 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1217 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1218 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1219 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1220 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1221 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0, 0 :: (dereferenceable invariant load 4) 1222 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] 1223 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1224 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1225 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 1226 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit29 1227 ; GFX8: bb.1 (%ir-block.0): 1228 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1229 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1230 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1231 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1232 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1233 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1234 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 1235 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1236 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1237 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1238 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1239 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 1240 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0) 1241 ret i32 %load 1242} 1243 1244define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) { 1245 ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit21 1246 ; GFX6: bb.1 (%ir-block.0): 1247 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1248 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1249 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1250 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1251 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1252 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1253 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 1254 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1255 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1256 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1257 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1258 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 1259 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit21 1260 ; GFX7: bb.1 (%ir-block.0): 1261 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1262 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1263 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1264 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1265 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1266 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1267 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0, 0 :: (dereferenceable invariant load 4) 1268 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] 1269 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1270 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1271 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 1272 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit21 1273 ; GFX8: bb.1 (%ir-block.0): 1274 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1275 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1276 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1277 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1278 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1279 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1280 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 1281 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1282 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1283 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1284 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1285 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 1286 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0) 1287 ret i32 %load 1288} 1289 1290define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) { 1291 ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit20 1292 ; GFX6: bb.1 (%ir-block.0): 1293 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1294 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1295 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1296 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1297 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1298 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1299 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 1300 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1301 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1302 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1303 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1304 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 1305 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit20 1306 ; GFX7: bb.1 (%ir-block.0): 1307 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1308 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1309 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1310 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1311 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1312 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1313 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0, 0 :: (dereferenceable invariant load 4) 1314 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] 1315 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1316 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1317 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 1318 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit20 1319 ; GFX8: bb.1 (%ir-block.0): 1320 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1321 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1322 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1323 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1324 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1325 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1326 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 1327 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1328 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1329 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1330 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1331 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 1332 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0) 1333 ret i32 %load 1334} 1335 1336define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc) { 1337 ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg_bit20 1338 ; GFX6: bb.1 (%ir-block.0): 1339 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1340 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1341 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1342 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1343 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1344 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1345 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4293918720 1346 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1347 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1348 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1349 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1350 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 1351 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit20 1352 ; GFX7: bb.1 (%ir-block.0): 1353 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1354 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1355 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1356 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1357 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1358 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1359 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0, 0 :: (dereferenceable invariant load 4) 1360 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] 1361 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1362 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1363 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 1364 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit20 1365 ; GFX8: bb.1 (%ir-block.0): 1366 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1367 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1368 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1369 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1370 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1371 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1372 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4293918720 1373 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1374 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1375 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1376 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1377 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 1378 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1048576, i32 0) 1379 ret i32 %load 1380} 1381 1382define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) { 1383 ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit19 1384 ; GFX6: bb.1 (%ir-block.0): 1385 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1386 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1387 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1388 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1389 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1390 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1391 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 524288 1392 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1393 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1394 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1395 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1396 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 1397 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit19 1398 ; GFX7: bb.1 (%ir-block.0): 1399 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1400 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1401 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1402 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1403 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1404 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1405 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0, 0 :: (dereferenceable invariant load 4) 1406 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] 1407 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1408 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1409 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 1410 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit19 1411 ; GFX8: bb.1 (%ir-block.0): 1412 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1413 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1414 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1415 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1416 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1417 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1418 ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0, 0 :: (dereferenceable invariant load 4) 1419 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] 1420 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1421 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1422 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 1423 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0) 1424 ret i32 %load 1425} 1426 1427define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc) { 1428 ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg_bit19 1429 ; GFX6: bb.1 (%ir-block.0): 1430 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1431 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1432 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1433 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1434 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1435 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1436 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294443008 1437 ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1438 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1439 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1440 ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1441 ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 1442 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit19 1443 ; GFX7: bb.1 (%ir-block.0): 1444 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1445 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1446 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1447 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1448 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1449 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1450 ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0, 0 :: (dereferenceable invariant load 4) 1451 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] 1452 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1453 ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1454 ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 1455 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit19 1456 ; GFX8: bb.1 (%ir-block.0): 1457 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 1458 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1459 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1460 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1461 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1462 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1463 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294443008 1464 ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4) 1465 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] 1466 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 1467 ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 1468 ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 1469 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0) 1470 ret i32 %load 1471} 1472 1473; Check cases that need to be converted to MUBUF due to the offset being a VGPR. 1474define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 1475 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset 1476 ; GFX6: bb.1 (%ir-block.0): 1477 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1478 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1479 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1480 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1481 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1482 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1483 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1484 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1485 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 1486 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1487 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 1488 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset 1489 ; GFX7: bb.1 (%ir-block.0): 1490 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1491 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1492 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1493 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1494 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1495 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1496 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1497 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1498 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 1499 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1500 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 1501 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset 1502 ; GFX8: bb.1 (%ir-block.0): 1503 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1504 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1505 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1506 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1507 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1508 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1509 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1510 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1511 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 1512 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1513 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 1514 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1515 ret float %val 1516} 1517 1518define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 1519 ; GFX6-LABEL: name: s_buffer_load_v2f32_vgpr_offset 1520 ; GFX6: bb.1 (%ir-block.0): 1521 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1522 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1523 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1524 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1525 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1526 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1527 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1528 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1529 ; GFX6: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4) 1530 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 1531 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 1532 ; GFX6: $vgpr0 = COPY [[COPY5]] 1533 ; GFX6: $vgpr1 = COPY [[COPY6]] 1534 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1535 ; GFX7-LABEL: name: s_buffer_load_v2f32_vgpr_offset 1536 ; GFX7: bb.1 (%ir-block.0): 1537 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1538 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1539 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1540 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1541 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1542 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1543 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1544 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1545 ; GFX7: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4) 1546 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 1547 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 1548 ; GFX7: $vgpr0 = COPY [[COPY5]] 1549 ; GFX7: $vgpr1 = COPY [[COPY6]] 1550 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1551 ; GFX8-LABEL: name: s_buffer_load_v2f32_vgpr_offset 1552 ; GFX8: bb.1 (%ir-block.0): 1553 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1554 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1555 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1556 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1557 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1558 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1559 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1560 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1561 ; GFX8: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4) 1562 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 1563 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 1564 ; GFX8: $vgpr0 = COPY [[COPY5]] 1565 ; GFX8: $vgpr1 = COPY [[COPY6]] 1566 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1567 %val = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1568 ret <2 x float> %val 1569} 1570 1571define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 1572 ; GFX6-LABEL: name: s_buffer_load_v3f32_vgpr_offset 1573 ; GFX6: bb.1 (%ir-block.0): 1574 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1575 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1576 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1577 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1578 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1579 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1580 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1581 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1582 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1583 ; GFX6: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0_sub1_sub2 1584 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub0 1585 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub1 1586 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub2 1587 ; GFX6: $vgpr0 = COPY [[COPY6]] 1588 ; GFX6: $vgpr1 = COPY [[COPY7]] 1589 ; GFX6: $vgpr2 = COPY [[COPY8]] 1590 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 1591 ; GFX7-LABEL: name: s_buffer_load_v3f32_vgpr_offset 1592 ; GFX7: bb.1 (%ir-block.0): 1593 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1594 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1595 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1596 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1597 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1598 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1599 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1600 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1601 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1602 ; GFX7: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0_sub1_sub2 1603 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub0 1604 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub1 1605 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub2 1606 ; GFX7: $vgpr0 = COPY [[COPY6]] 1607 ; GFX7: $vgpr1 = COPY [[COPY7]] 1608 ; GFX7: $vgpr2 = COPY [[COPY8]] 1609 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 1610 ; GFX8-LABEL: name: s_buffer_load_v3f32_vgpr_offset 1611 ; GFX8: bb.1 (%ir-block.0): 1612 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1613 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1614 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1615 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1616 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1617 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1618 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1619 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1620 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1621 ; GFX8: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0_sub1_sub2 1622 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub0 1623 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub1 1624 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub2 1625 ; GFX8: $vgpr0 = COPY [[COPY6]] 1626 ; GFX8: $vgpr1 = COPY [[COPY7]] 1627 ; GFX8: $vgpr2 = COPY [[COPY8]] 1628 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 1629 %val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1630 ret <3 x float> %val 1631} 1632 1633define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 1634 ; GFX6-LABEL: name: s_buffer_load_v4f32_vgpr_offset 1635 ; GFX6: bb.1 (%ir-block.0): 1636 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1637 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1638 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1639 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1640 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1641 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1642 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1643 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1644 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1645 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 1646 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 1647 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 1648 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 1649 ; GFX6: $vgpr0 = COPY [[COPY5]] 1650 ; GFX6: $vgpr1 = COPY [[COPY6]] 1651 ; GFX6: $vgpr2 = COPY [[COPY7]] 1652 ; GFX6: $vgpr3 = COPY [[COPY8]] 1653 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 1654 ; GFX7-LABEL: name: s_buffer_load_v4f32_vgpr_offset 1655 ; GFX7: bb.1 (%ir-block.0): 1656 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1657 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1658 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1659 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1660 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1661 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1662 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1663 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1664 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1665 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 1666 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 1667 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 1668 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 1669 ; GFX7: $vgpr0 = COPY [[COPY5]] 1670 ; GFX7: $vgpr1 = COPY [[COPY6]] 1671 ; GFX7: $vgpr2 = COPY [[COPY7]] 1672 ; GFX7: $vgpr3 = COPY [[COPY8]] 1673 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 1674 ; GFX8-LABEL: name: s_buffer_load_v4f32_vgpr_offset 1675 ; GFX8: bb.1 (%ir-block.0): 1676 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1677 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1678 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1679 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1680 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1681 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1682 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1683 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1684 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1685 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 1686 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 1687 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 1688 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 1689 ; GFX8: $vgpr0 = COPY [[COPY5]] 1690 ; GFX8: $vgpr1 = COPY [[COPY6]] 1691 ; GFX8: $vgpr2 = COPY [[COPY7]] 1692 ; GFX8: $vgpr3 = COPY [[COPY8]] 1693 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 1694 %val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1695 ret <4 x float> %val 1696} 1697 1698define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 1699 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset 1700 ; GFX6: bb.1 (%ir-block.0): 1701 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1702 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1703 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1704 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1705 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1706 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1707 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1708 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1709 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1710 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1711 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 1712 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1713 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1714 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 1715 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 1716 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 1717 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 1718 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 1719 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 1720 ; GFX6: $vgpr0 = COPY [[COPY5]] 1721 ; GFX6: $vgpr1 = COPY [[COPY6]] 1722 ; GFX6: $vgpr2 = COPY [[COPY7]] 1723 ; GFX6: $vgpr3 = COPY [[COPY8]] 1724 ; GFX6: $vgpr4 = COPY [[COPY9]] 1725 ; GFX6: $vgpr5 = COPY [[COPY10]] 1726 ; GFX6: $vgpr6 = COPY [[COPY11]] 1727 ; GFX6: $vgpr7 = COPY [[COPY12]] 1728 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 1729 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset 1730 ; GFX7: bb.1 (%ir-block.0): 1731 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1732 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1733 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1734 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1735 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1736 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1737 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1738 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1739 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1740 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1741 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 1742 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1743 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1744 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 1745 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 1746 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 1747 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 1748 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 1749 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 1750 ; GFX7: $vgpr0 = COPY [[COPY5]] 1751 ; GFX7: $vgpr1 = COPY [[COPY6]] 1752 ; GFX7: $vgpr2 = COPY [[COPY7]] 1753 ; GFX7: $vgpr3 = COPY [[COPY8]] 1754 ; GFX7: $vgpr4 = COPY [[COPY9]] 1755 ; GFX7: $vgpr5 = COPY [[COPY10]] 1756 ; GFX7: $vgpr6 = COPY [[COPY11]] 1757 ; GFX7: $vgpr7 = COPY [[COPY12]] 1758 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 1759 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset 1760 ; GFX8: bb.1 (%ir-block.0): 1761 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1762 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1763 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1764 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1765 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1766 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1767 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1768 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1769 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1770 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1771 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 1772 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1773 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1774 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 1775 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 1776 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 1777 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 1778 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 1779 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 1780 ; GFX8: $vgpr0 = COPY [[COPY5]] 1781 ; GFX8: $vgpr1 = COPY [[COPY6]] 1782 ; GFX8: $vgpr2 = COPY [[COPY7]] 1783 ; GFX8: $vgpr3 = COPY [[COPY8]] 1784 ; GFX8: $vgpr4 = COPY [[COPY9]] 1785 ; GFX8: $vgpr5 = COPY [[COPY10]] 1786 ; GFX8: $vgpr6 = COPY [[COPY11]] 1787 ; GFX8: $vgpr7 = COPY [[COPY12]] 1788 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 1789 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1790 ret <8 x float> %val 1791} 1792 1793define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 1794 ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset 1795 ; GFX6: bb.1 (%ir-block.0): 1796 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1797 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1798 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1799 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1800 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1801 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1802 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1803 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1804 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1805 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1806 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4) 1807 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4) 1808 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 1809 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1810 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1811 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 1812 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 1813 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 1814 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 1815 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 1816 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 1817 ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 1818 ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 1819 ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 1820 ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 1821 ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 1822 ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 1823 ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 1824 ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 1825 ; GFX6: $vgpr0 = COPY [[COPY5]] 1826 ; GFX6: $vgpr1 = COPY [[COPY6]] 1827 ; GFX6: $vgpr2 = COPY [[COPY7]] 1828 ; GFX6: $vgpr3 = COPY [[COPY8]] 1829 ; GFX6: $vgpr4 = COPY [[COPY9]] 1830 ; GFX6: $vgpr5 = COPY [[COPY10]] 1831 ; GFX6: $vgpr6 = COPY [[COPY11]] 1832 ; GFX6: $vgpr7 = COPY [[COPY12]] 1833 ; GFX6: $vgpr8 = COPY [[COPY13]] 1834 ; GFX6: $vgpr9 = COPY [[COPY14]] 1835 ; GFX6: $vgpr10 = COPY [[COPY15]] 1836 ; GFX6: $vgpr11 = COPY [[COPY16]] 1837 ; GFX6: $vgpr12 = COPY [[COPY17]] 1838 ; GFX6: $vgpr13 = COPY [[COPY18]] 1839 ; GFX6: $vgpr14 = COPY [[COPY19]] 1840 ; GFX6: $vgpr15 = COPY [[COPY20]] 1841 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 1842 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset 1843 ; GFX7: bb.1 (%ir-block.0): 1844 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1845 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1846 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1847 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1848 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1849 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1850 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1851 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1852 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1853 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1854 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4) 1855 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4) 1856 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 1857 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1858 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1859 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 1860 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 1861 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 1862 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 1863 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 1864 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 1865 ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 1866 ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 1867 ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 1868 ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 1869 ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 1870 ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 1871 ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 1872 ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 1873 ; GFX7: $vgpr0 = COPY [[COPY5]] 1874 ; GFX7: $vgpr1 = COPY [[COPY6]] 1875 ; GFX7: $vgpr2 = COPY [[COPY7]] 1876 ; GFX7: $vgpr3 = COPY [[COPY8]] 1877 ; GFX7: $vgpr4 = COPY [[COPY9]] 1878 ; GFX7: $vgpr5 = COPY [[COPY10]] 1879 ; GFX7: $vgpr6 = COPY [[COPY11]] 1880 ; GFX7: $vgpr7 = COPY [[COPY12]] 1881 ; GFX7: $vgpr8 = COPY [[COPY13]] 1882 ; GFX7: $vgpr9 = COPY [[COPY14]] 1883 ; GFX7: $vgpr10 = COPY [[COPY15]] 1884 ; GFX7: $vgpr11 = COPY [[COPY16]] 1885 ; GFX7: $vgpr12 = COPY [[COPY17]] 1886 ; GFX7: $vgpr13 = COPY [[COPY18]] 1887 ; GFX7: $vgpr14 = COPY [[COPY19]] 1888 ; GFX7: $vgpr15 = COPY [[COPY20]] 1889 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 1890 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset 1891 ; GFX8: bb.1 (%ir-block.0): 1892 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1893 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1894 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1895 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1896 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1897 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1898 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1899 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1900 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1901 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 1902 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4) 1903 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4) 1904 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 1905 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1906 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1907 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 1908 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 1909 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 1910 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 1911 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 1912 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 1913 ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 1914 ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 1915 ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 1916 ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 1917 ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 1918 ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 1919 ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 1920 ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 1921 ; GFX8: $vgpr0 = COPY [[COPY5]] 1922 ; GFX8: $vgpr1 = COPY [[COPY6]] 1923 ; GFX8: $vgpr2 = COPY [[COPY7]] 1924 ; GFX8: $vgpr3 = COPY [[COPY8]] 1925 ; GFX8: $vgpr4 = COPY [[COPY9]] 1926 ; GFX8: $vgpr5 = COPY [[COPY10]] 1927 ; GFX8: $vgpr6 = COPY [[COPY11]] 1928 ; GFX8: $vgpr7 = COPY [[COPY12]] 1929 ; GFX8: $vgpr8 = COPY [[COPY13]] 1930 ; GFX8: $vgpr9 = COPY [[COPY14]] 1931 ; GFX8: $vgpr10 = COPY [[COPY15]] 1932 ; GFX8: $vgpr11 = COPY [[COPY16]] 1933 ; GFX8: $vgpr12 = COPY [[COPY17]] 1934 ; GFX8: $vgpr13 = COPY [[COPY18]] 1935 ; GFX8: $vgpr14 = COPY [[COPY19]] 1936 ; GFX8: $vgpr15 = COPY [[COPY20]] 1937 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 1938 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1939 ret <16 x float> %val 1940} 1941 1942define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg %rsrc, i32 %soffset.base) { 1943 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 1944 ; GFX6: bb.1 (%ir-block.0): 1945 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1946 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1947 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1948 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1949 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1950 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1951 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1952 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1953 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 1954 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1955 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 1956 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 1957 ; GFX7: bb.1 (%ir-block.0): 1958 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1959 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1960 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1961 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1962 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1963 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1964 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1965 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1966 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 1967 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1968 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 1969 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 1970 ; GFX8: bb.1 (%ir-block.0): 1971 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1972 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1973 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1974 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1975 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1976 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1977 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1978 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1979 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 1980 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1981 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 1982 %soffset = add i32 %soffset.base, 4092 1983 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1984 ret float %val 1985} 1986 1987define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg %rsrc, i32 %soffset.base) { 1988 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 1989 ; GFX6: bb.1 (%ir-block.0): 1990 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1991 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 1992 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 1993 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 1994 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 1995 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1996 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 1997 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 1998 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 1999 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2000 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 2001 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 2002 ; GFX7: bb.1 (%ir-block.0): 2003 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2004 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2005 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2006 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2007 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2008 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2009 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2010 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2011 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2012 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2013 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 2014 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 2015 ; GFX8: bb.1 (%ir-block.0): 2016 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2017 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2018 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2019 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2020 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2021 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2022 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2023 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2024 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2025 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2026 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 2027 %soffset = add i32 %soffset.base, 4095 2028 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2029 ret float %val 2030} 2031 2032define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg %rsrc, i32 %soffset.base) { 2033 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 2034 ; GFX6: bb.1 (%ir-block.0): 2035 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2036 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2037 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2038 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2039 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2040 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2041 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2042 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 2043 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2044 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2045 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 2046 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 2047 ; GFX7: bb.1 (%ir-block.0): 2048 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2049 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2050 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2051 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2052 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2053 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2054 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2055 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 2056 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2057 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2058 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 2059 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 2060 ; GFX8: bb.1 (%ir-block.0): 2061 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2062 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2063 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2064 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2065 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2066 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2067 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2068 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 2069 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2070 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2071 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 2072 %soffset = add i32 %soffset.base, 4096 2073 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2074 ret float %val 2075} 2076 2077; Make sure the base offset is added to each split load. 2078define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> inreg %rsrc, i32 %soffset.base) { 2079 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 2080 ; GFX6: bb.1 (%ir-block.0): 2081 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2082 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2083 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2084 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2085 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2086 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2087 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2088 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2089 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2090 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2091 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 2092 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2093 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2094 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2095 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2096 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2097 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2098 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2099 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2100 ; GFX6: $vgpr0 = COPY [[COPY5]] 2101 ; GFX6: $vgpr1 = COPY [[COPY6]] 2102 ; GFX6: $vgpr2 = COPY [[COPY7]] 2103 ; GFX6: $vgpr3 = COPY [[COPY8]] 2104 ; GFX6: $vgpr4 = COPY [[COPY9]] 2105 ; GFX6: $vgpr5 = COPY [[COPY10]] 2106 ; GFX6: $vgpr6 = COPY [[COPY11]] 2107 ; GFX6: $vgpr7 = COPY [[COPY12]] 2108 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2109 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 2110 ; GFX7: bb.1 (%ir-block.0): 2111 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2112 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2113 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2114 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2115 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2116 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2117 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2118 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2119 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2120 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2121 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 2122 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2123 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2124 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2125 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2126 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2127 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2128 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2129 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2130 ; GFX7: $vgpr0 = COPY [[COPY5]] 2131 ; GFX7: $vgpr1 = COPY [[COPY6]] 2132 ; GFX7: $vgpr2 = COPY [[COPY7]] 2133 ; GFX7: $vgpr3 = COPY [[COPY8]] 2134 ; GFX7: $vgpr4 = COPY [[COPY9]] 2135 ; GFX7: $vgpr5 = COPY [[COPY10]] 2136 ; GFX7: $vgpr6 = COPY [[COPY11]] 2137 ; GFX7: $vgpr7 = COPY [[COPY12]] 2138 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2139 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 2140 ; GFX8: bb.1 (%ir-block.0): 2141 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2142 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2143 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2144 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2145 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2146 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2147 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2148 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2149 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2150 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2151 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 2152 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2153 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2154 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2155 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2156 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2157 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2158 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2159 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2160 ; GFX8: $vgpr0 = COPY [[COPY5]] 2161 ; GFX8: $vgpr1 = COPY [[COPY6]] 2162 ; GFX8: $vgpr2 = COPY [[COPY7]] 2163 ; GFX8: $vgpr3 = COPY [[COPY8]] 2164 ; GFX8: $vgpr4 = COPY [[COPY9]] 2165 ; GFX8: $vgpr5 = COPY [[COPY10]] 2166 ; GFX8: $vgpr6 = COPY [[COPY11]] 2167 ; GFX8: $vgpr7 = COPY [[COPY12]] 2168 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2169 %soffset = add i32 %soffset.base, 4064 2170 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2171 ret <8 x float> %val 2172} 2173 2174; Make sure the maximum offset isn't exeeded when splitting this 2175define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> inreg %rsrc, i32 %soffset.base) { 2176 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 2177 ; GFX6: bb.1 (%ir-block.0): 2178 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2179 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2180 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2181 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2182 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2183 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2184 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2185 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 2186 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2187 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2188 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 2189 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2190 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2191 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2192 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2193 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2194 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2195 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2196 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2197 ; GFX6: $vgpr0 = COPY [[COPY5]] 2198 ; GFX6: $vgpr1 = COPY [[COPY6]] 2199 ; GFX6: $vgpr2 = COPY [[COPY7]] 2200 ; GFX6: $vgpr3 = COPY [[COPY8]] 2201 ; GFX6: $vgpr4 = COPY [[COPY9]] 2202 ; GFX6: $vgpr5 = COPY [[COPY10]] 2203 ; GFX6: $vgpr6 = COPY [[COPY11]] 2204 ; GFX6: $vgpr7 = COPY [[COPY12]] 2205 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2206 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 2207 ; GFX7: bb.1 (%ir-block.0): 2208 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2209 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2210 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2211 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2212 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2213 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2214 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2215 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 2216 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2217 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2218 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 2219 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2220 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2221 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2222 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2223 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2224 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2225 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2226 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2227 ; GFX7: $vgpr0 = COPY [[COPY5]] 2228 ; GFX7: $vgpr1 = COPY [[COPY6]] 2229 ; GFX7: $vgpr2 = COPY [[COPY7]] 2230 ; GFX7: $vgpr3 = COPY [[COPY8]] 2231 ; GFX7: $vgpr4 = COPY [[COPY9]] 2232 ; GFX7: $vgpr5 = COPY [[COPY10]] 2233 ; GFX7: $vgpr6 = COPY [[COPY11]] 2234 ; GFX7: $vgpr7 = COPY [[COPY12]] 2235 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2236 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 2237 ; GFX8: bb.1 (%ir-block.0): 2238 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2239 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2240 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2241 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2242 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2243 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2244 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2245 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 2246 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2247 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2248 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 2249 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2250 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2251 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2252 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2253 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2254 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2255 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2256 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2257 ; GFX8: $vgpr0 = COPY [[COPY5]] 2258 ; GFX8: $vgpr1 = COPY [[COPY6]] 2259 ; GFX8: $vgpr2 = COPY [[COPY7]] 2260 ; GFX8: $vgpr3 = COPY [[COPY8]] 2261 ; GFX8: $vgpr4 = COPY [[COPY9]] 2262 ; GFX8: $vgpr5 = COPY [[COPY10]] 2263 ; GFX8: $vgpr6 = COPY [[COPY11]] 2264 ; GFX8: $vgpr7 = COPY [[COPY12]] 2265 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2266 %soffset = add i32 %soffset.base, 4068 2267 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2268 ret <8 x float> %val 2269} 2270 2271define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i32> inreg %rsrc, i32 %soffset.base) { 2272 ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 2273 ; GFX6: bb.1 (%ir-block.0): 2274 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2275 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2276 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2277 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2278 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2279 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2280 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2281 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2282 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2283 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2284 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4) 2285 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4) 2286 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 2287 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2288 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2289 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2290 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2291 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2292 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2293 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2294 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2295 ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 2296 ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 2297 ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 2298 ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 2299 ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 2300 ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 2301 ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 2302 ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 2303 ; GFX6: $vgpr0 = COPY [[COPY5]] 2304 ; GFX6: $vgpr1 = COPY [[COPY6]] 2305 ; GFX6: $vgpr2 = COPY [[COPY7]] 2306 ; GFX6: $vgpr3 = COPY [[COPY8]] 2307 ; GFX6: $vgpr4 = COPY [[COPY9]] 2308 ; GFX6: $vgpr5 = COPY [[COPY10]] 2309 ; GFX6: $vgpr6 = COPY [[COPY11]] 2310 ; GFX6: $vgpr7 = COPY [[COPY12]] 2311 ; GFX6: $vgpr8 = COPY [[COPY13]] 2312 ; GFX6: $vgpr9 = COPY [[COPY14]] 2313 ; GFX6: $vgpr10 = COPY [[COPY15]] 2314 ; GFX6: $vgpr11 = COPY [[COPY16]] 2315 ; GFX6: $vgpr12 = COPY [[COPY17]] 2316 ; GFX6: $vgpr13 = COPY [[COPY18]] 2317 ; GFX6: $vgpr14 = COPY [[COPY19]] 2318 ; GFX6: $vgpr15 = COPY [[COPY20]] 2319 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 2320 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 2321 ; GFX7: bb.1 (%ir-block.0): 2322 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2323 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2324 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2325 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2326 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2327 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2328 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2329 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2330 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2331 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2332 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4) 2333 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4) 2334 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 2335 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2336 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2337 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2338 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2339 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2340 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2341 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2342 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2343 ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 2344 ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 2345 ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 2346 ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 2347 ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 2348 ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 2349 ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 2350 ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 2351 ; GFX7: $vgpr0 = COPY [[COPY5]] 2352 ; GFX7: $vgpr1 = COPY [[COPY6]] 2353 ; GFX7: $vgpr2 = COPY [[COPY7]] 2354 ; GFX7: $vgpr3 = COPY [[COPY8]] 2355 ; GFX7: $vgpr4 = COPY [[COPY9]] 2356 ; GFX7: $vgpr5 = COPY [[COPY10]] 2357 ; GFX7: $vgpr6 = COPY [[COPY11]] 2358 ; GFX7: $vgpr7 = COPY [[COPY12]] 2359 ; GFX7: $vgpr8 = COPY [[COPY13]] 2360 ; GFX7: $vgpr9 = COPY [[COPY14]] 2361 ; GFX7: $vgpr10 = COPY [[COPY15]] 2362 ; GFX7: $vgpr11 = COPY [[COPY16]] 2363 ; GFX7: $vgpr12 = COPY [[COPY17]] 2364 ; GFX7: $vgpr13 = COPY [[COPY18]] 2365 ; GFX7: $vgpr14 = COPY [[COPY19]] 2366 ; GFX7: $vgpr15 = COPY [[COPY20]] 2367 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 2368 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 2369 ; GFX8: bb.1 (%ir-block.0): 2370 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2371 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2372 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2373 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2374 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2375 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2376 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2377 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2378 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2379 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2380 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4) 2381 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4) 2382 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 2383 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2384 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2385 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2386 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2387 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2388 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2389 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2390 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2391 ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 2392 ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 2393 ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 2394 ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 2395 ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 2396 ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 2397 ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 2398 ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 2399 ; GFX8: $vgpr0 = COPY [[COPY5]] 2400 ; GFX8: $vgpr1 = COPY [[COPY6]] 2401 ; GFX8: $vgpr2 = COPY [[COPY7]] 2402 ; GFX8: $vgpr3 = COPY [[COPY8]] 2403 ; GFX8: $vgpr4 = COPY [[COPY9]] 2404 ; GFX8: $vgpr5 = COPY [[COPY10]] 2405 ; GFX8: $vgpr6 = COPY [[COPY11]] 2406 ; GFX8: $vgpr7 = COPY [[COPY12]] 2407 ; GFX8: $vgpr8 = COPY [[COPY13]] 2408 ; GFX8: $vgpr9 = COPY [[COPY14]] 2409 ; GFX8: $vgpr10 = COPY [[COPY15]] 2410 ; GFX8: $vgpr11 = COPY [[COPY16]] 2411 ; GFX8: $vgpr12 = COPY [[COPY17]] 2412 ; GFX8: $vgpr13 = COPY [[COPY18]] 2413 ; GFX8: $vgpr14 = COPY [[COPY19]] 2414 ; GFX8: $vgpr15 = COPY [[COPY20]] 2415 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 2416 %soffset = add i32 %soffset.base, 4032 2417 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2418 ret <16 x float> %val 2419} 2420 2421define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i32> inreg %rsrc, i32 %soffset.base) { 2422 ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 2423 ; GFX6: bb.1 (%ir-block.0): 2424 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2425 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2426 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2427 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2428 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2429 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2430 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2431 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 2432 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2433 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2434 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4) 2435 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4) 2436 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 2437 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2438 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2439 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2440 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2441 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2442 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2443 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2444 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2445 ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 2446 ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 2447 ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 2448 ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 2449 ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 2450 ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 2451 ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 2452 ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 2453 ; GFX6: $vgpr0 = COPY [[COPY5]] 2454 ; GFX6: $vgpr1 = COPY [[COPY6]] 2455 ; GFX6: $vgpr2 = COPY [[COPY7]] 2456 ; GFX6: $vgpr3 = COPY [[COPY8]] 2457 ; GFX6: $vgpr4 = COPY [[COPY9]] 2458 ; GFX6: $vgpr5 = COPY [[COPY10]] 2459 ; GFX6: $vgpr6 = COPY [[COPY11]] 2460 ; GFX6: $vgpr7 = COPY [[COPY12]] 2461 ; GFX6: $vgpr8 = COPY [[COPY13]] 2462 ; GFX6: $vgpr9 = COPY [[COPY14]] 2463 ; GFX6: $vgpr10 = COPY [[COPY15]] 2464 ; GFX6: $vgpr11 = COPY [[COPY16]] 2465 ; GFX6: $vgpr12 = COPY [[COPY17]] 2466 ; GFX6: $vgpr13 = COPY [[COPY18]] 2467 ; GFX6: $vgpr14 = COPY [[COPY19]] 2468 ; GFX6: $vgpr15 = COPY [[COPY20]] 2469 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 2470 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 2471 ; GFX7: bb.1 (%ir-block.0): 2472 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2473 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2474 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2475 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2476 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2477 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2478 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2479 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 2480 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2481 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2482 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4) 2483 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4) 2484 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 2485 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2486 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2487 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2488 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2489 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2490 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2491 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2492 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2493 ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 2494 ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 2495 ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 2496 ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 2497 ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 2498 ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 2499 ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 2500 ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 2501 ; GFX7: $vgpr0 = COPY [[COPY5]] 2502 ; GFX7: $vgpr1 = COPY [[COPY6]] 2503 ; GFX7: $vgpr2 = COPY [[COPY7]] 2504 ; GFX7: $vgpr3 = COPY [[COPY8]] 2505 ; GFX7: $vgpr4 = COPY [[COPY9]] 2506 ; GFX7: $vgpr5 = COPY [[COPY10]] 2507 ; GFX7: $vgpr6 = COPY [[COPY11]] 2508 ; GFX7: $vgpr7 = COPY [[COPY12]] 2509 ; GFX7: $vgpr8 = COPY [[COPY13]] 2510 ; GFX7: $vgpr9 = COPY [[COPY14]] 2511 ; GFX7: $vgpr10 = COPY [[COPY15]] 2512 ; GFX7: $vgpr11 = COPY [[COPY16]] 2513 ; GFX7: $vgpr12 = COPY [[COPY17]] 2514 ; GFX7: $vgpr13 = COPY [[COPY18]] 2515 ; GFX7: $vgpr14 = COPY [[COPY19]] 2516 ; GFX7: $vgpr15 = COPY [[COPY20]] 2517 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 2518 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 2519 ; GFX8: bb.1 (%ir-block.0): 2520 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 2521 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 2522 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 2523 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 2524 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 2525 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2526 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2527 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 2528 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2529 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 2530 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4) 2531 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4) 2532 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 2533 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2534 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2535 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 2536 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 2537 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 2538 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 2539 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 2540 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 2541 ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 2542 ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 2543 ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 2544 ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 2545 ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 2546 ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 2547 ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 2548 ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 2549 ; GFX8: $vgpr0 = COPY [[COPY5]] 2550 ; GFX8: $vgpr1 = COPY [[COPY6]] 2551 ; GFX8: $vgpr2 = COPY [[COPY7]] 2552 ; GFX8: $vgpr3 = COPY [[COPY8]] 2553 ; GFX8: $vgpr4 = COPY [[COPY9]] 2554 ; GFX8: $vgpr5 = COPY [[COPY10]] 2555 ; GFX8: $vgpr6 = COPY [[COPY11]] 2556 ; GFX8: $vgpr7 = COPY [[COPY12]] 2557 ; GFX8: $vgpr8 = COPY [[COPY13]] 2558 ; GFX8: $vgpr9 = COPY [[COPY14]] 2559 ; GFX8: $vgpr10 = COPY [[COPY15]] 2560 ; GFX8: $vgpr11 = COPY [[COPY16]] 2561 ; GFX8: $vgpr12 = COPY [[COPY17]] 2562 ; GFX8: $vgpr13 = COPY [[COPY18]] 2563 ; GFX8: $vgpr14 = COPY [[COPY19]] 2564 ; GFX8: $vgpr15 = COPY [[COPY20]] 2565 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 2566 %soffset = add i32 %soffset.base, 4036 2567 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2568 ret <16 x float> %val 2569} 2570 2571; Waterfall loop due to resource being VGPR 2572define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %soffset) { 2573 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc 2574 ; GFX6: bb.1 (%ir-block.0): 2575 ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2576 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2577 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2578 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2579 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2580 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 2581 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2582 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] 2583 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2584 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2585 ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2586 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2587 ; GFX6: bb.2: 2588 ; GFX6: successors: %bb.3, %bb.2 2589 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 2590 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 2591 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2592 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 2593 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 2594 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 2595 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2596 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 2597 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2598 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2599 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2600 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2601 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2602 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2603 ; GFX6: bb.3: 2604 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2605 ; GFX6: bb.4: 2606 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2607 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 2608 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc 2609 ; GFX7: bb.1 (%ir-block.0): 2610 ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2611 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2612 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2613 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2614 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2615 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 2616 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2617 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] 2618 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2619 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2620 ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2621 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2622 ; GFX7: bb.2: 2623 ; GFX7: successors: %bb.3, %bb.2 2624 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 2625 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 2626 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2627 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 2628 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 2629 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 2630 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2631 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 2632 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2633 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2634 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2635 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2636 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2637 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2638 ; GFX7: bb.3: 2639 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2640 ; GFX7: bb.4: 2641 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2642 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 2643 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc 2644 ; GFX8: bb.1 (%ir-block.0): 2645 ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2646 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2647 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2648 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2649 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2650 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 2651 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2652 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] 2653 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2654 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2655 ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2656 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2657 ; GFX8: bb.2: 2658 ; GFX8: successors: %bb.3, %bb.2 2659 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 2660 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 2661 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2662 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 2663 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 2664 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 2665 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2666 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 2667 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2668 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2669 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2670 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2671 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2672 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2673 ; GFX8: bb.3: 2674 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2675 ; GFX8: bb.4: 2676 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2677 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 2678 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2679 ret float %val 2680} 2681 2682; Use the offset inside the waterfall loop 2683define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %rsrc, i32 inreg %soffset.base) { 2684 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 2685 ; GFX6: bb.1 (%ir-block.0): 2686 ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2687 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2688 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2689 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2690 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2691 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 2692 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2693 ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2694 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2695 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2696 ; GFX6: bb.2: 2697 ; GFX6: successors: %bb.3, %bb.2 2698 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 2699 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 2700 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2701 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 2702 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 2703 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 2704 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2705 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 2706 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2707 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2708 ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2709 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2710 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2711 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2712 ; GFX6: bb.3: 2713 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2714 ; GFX6: bb.4: 2715 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] 2716 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 2717 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 2718 ; GFX7: bb.1 (%ir-block.0): 2719 ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2720 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2721 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2722 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2723 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2724 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 2725 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2726 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2727 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2728 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2729 ; GFX7: bb.2: 2730 ; GFX7: successors: %bb.3, %bb.2 2731 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 2732 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 2733 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2734 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 2735 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 2736 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 2737 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2738 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 2739 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2740 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2741 ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2742 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2743 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2744 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2745 ; GFX7: bb.3: 2746 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2747 ; GFX7: bb.4: 2748 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] 2749 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 2750 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 2751 ; GFX8: bb.1 (%ir-block.0): 2752 ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2753 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2754 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2755 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2756 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2757 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 2758 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2759 ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2760 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2761 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2762 ; GFX8: bb.2: 2763 ; GFX8: successors: %bb.3, %bb.2 2764 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 2765 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 2766 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2767 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 2768 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 2769 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 2770 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2771 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 2772 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2773 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2774 ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2775 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2776 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2777 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2778 ; GFX8: bb.3: 2779 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2780 ; GFX8: bb.4: 2781 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] 2782 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 2783 %soffset = add i32 %soffset.base, 4092 2784 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2785 ret float %val 2786} 2787 2788; Scalar offset exceeds MUBUF limit, keep add out of the loop 2789define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) { 2790 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 2791 ; GFX6: bb.1 (%ir-block.0): 2792 ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2793 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2794 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2795 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2796 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2797 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 2798 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2799 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 2800 ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc 2801 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] 2802 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2803 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2804 ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2805 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2806 ; GFX6: bb.2: 2807 ; GFX6: successors: %bb.3, %bb.2 2808 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 2809 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 2810 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2811 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 2812 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 2813 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 2814 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2815 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 2816 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2817 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2818 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2819 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2820 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2821 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2822 ; GFX6: bb.3: 2823 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2824 ; GFX6: bb.4: 2825 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2826 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 2827 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 2828 ; GFX7: bb.1 (%ir-block.0): 2829 ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2830 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2831 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2832 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2833 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2834 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 2835 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2836 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 2837 ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc 2838 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] 2839 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2840 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2841 ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2842 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2843 ; GFX7: bb.2: 2844 ; GFX7: successors: %bb.3, %bb.2 2845 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 2846 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 2847 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2848 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 2849 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 2850 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 2851 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2852 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 2853 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2854 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2855 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2856 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2857 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2858 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2859 ; GFX7: bb.3: 2860 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2861 ; GFX7: bb.4: 2862 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2863 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 2864 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 2865 ; GFX8: bb.1 (%ir-block.0): 2866 ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2867 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2868 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2869 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2870 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2871 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 2872 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2873 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 2874 ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc 2875 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] 2876 ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2877 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2878 ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2879 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2880 ; GFX8: bb.2: 2881 ; GFX8: successors: %bb.3, %bb.2 2882 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 2883 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 2884 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2885 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 2886 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 2887 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 2888 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2889 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 2890 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2891 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2892 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 2893 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2894 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2895 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2896 ; GFX8: bb.3: 2897 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2898 ; GFX8: bb.4: 2899 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2900 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 2901 %soffset = add i32 %soffset.base, 4096 2902 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2903 ret float %val 2904} 2905 2906; Waterfall loop, but constant offset 2907define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) { 2908 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 2909 ; GFX6: bb.1 (%ir-block.0): 2910 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 2911 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2912 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2913 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2914 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2915 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2916 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2917 ; GFX6: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2918 ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2919 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2920 ; GFX6: bb.2: 2921 ; GFX6: successors: %bb.3, %bb.2 2922 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec 2923 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec 2924 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2925 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec 2926 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 2927 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 2928 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2929 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec 2930 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2931 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2932 ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 + 4095, align 1) 2933 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2934 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2935 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2936 ; GFX6: bb.3: 2937 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2938 ; GFX6: bb.4: 2939 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] 2940 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 2941 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 2942 ; GFX7: bb.1 (%ir-block.0): 2943 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 2944 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2945 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2946 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2947 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2948 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2949 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2950 ; GFX7: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2951 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2952 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2953 ; GFX7: bb.2: 2954 ; GFX7: successors: %bb.3, %bb.2 2955 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec 2956 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec 2957 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2958 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec 2959 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 2960 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 2961 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2962 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec 2963 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2964 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2965 ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 + 4095, align 1) 2966 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2967 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2968 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2969 ; GFX7: bb.3: 2970 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2971 ; GFX7: bb.4: 2972 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] 2973 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 2974 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 2975 ; GFX8: bb.1 (%ir-block.0): 2976 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 2977 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2978 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2979 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2980 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2981 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 2982 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 2983 ; GFX8: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 2984 ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 2985 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2986 ; GFX8: bb.2: 2987 ; GFX8: successors: %bb.3, %bb.2 2988 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec 2989 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec 2990 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2991 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec 2992 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 2993 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 2994 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2995 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec 2996 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2997 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2998 ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 + 4095, align 1) 2999 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3000 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3001 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3002 ; GFX8: bb.3: 3003 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3004 ; GFX8: bb.4: 3005 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] 3006 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 3007 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 0) 3008 ret float %val 3009} 3010 3011; Waterfall loop, but constant offset 3012define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) { 3013 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 3014 ; GFX6: bb.1 (%ir-block.0): 3015 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 3016 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3017 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3018 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3019 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3020 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3021 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 3022 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 3023 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 3024 ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3025 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3026 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3027 ; GFX6: bb.2: 3028 ; GFX6: successors: %bb.3, %bb.2 3029 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3030 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3031 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3032 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3033 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3034 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3035 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3036 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3037 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3038 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3039 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 3040 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3041 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3042 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3043 ; GFX6: bb.3: 3044 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3045 ; GFX6: bb.4: 3046 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 3047 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 3048 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 3049 ; GFX7: bb.1 (%ir-block.0): 3050 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 3051 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3052 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3053 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3054 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3055 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3056 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 3057 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 3058 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 3059 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3060 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3061 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3062 ; GFX7: bb.2: 3063 ; GFX7: successors: %bb.3, %bb.2 3064 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3065 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3066 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3067 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3068 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3069 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3070 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3071 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3072 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3073 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3074 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 3075 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3076 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3077 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3078 ; GFX7: bb.3: 3079 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3080 ; GFX7: bb.4: 3081 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 3082 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 3083 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 3084 ; GFX8: bb.1 (%ir-block.0): 3085 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 3086 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3087 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3088 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3089 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3090 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3091 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 3092 ; GFX8: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3093 ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3094 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3095 ; GFX8: bb.2: 3096 ; GFX8: successors: %bb.3, %bb.2 3097 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec 3098 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec 3099 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3100 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec 3101 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3102 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3103 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3104 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec 3105 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3106 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3107 ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 + 4096) 3108 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3109 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3110 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3111 ; GFX8: bb.3: 3112 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3113 ; GFX8: bb.4: 3114 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] 3115 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 3116 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 0) 3117 ret float %val 3118} 3119 3120; Need a waterfall loop, but the offset is scalar. 3121; Make sure the base offset is added to each split load. 3122define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %rsrc, i32 inreg %soffset.base) { 3123 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 3124 ; GFX6: bb.1 (%ir-block.0): 3125 ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 3126 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3127 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3128 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3129 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3130 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 3131 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3132 ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3133 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3134 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3135 ; GFX6: bb.2: 3136 ; GFX6: successors: %bb.3, %bb.2 3137 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3138 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3139 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3140 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3141 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3142 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3143 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3144 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3145 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3146 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3147 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3148 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3149 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3150 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3151 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3152 ; GFX6: bb.3: 3153 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3154 ; GFX6: bb.4: 3155 ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 3156 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3157 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3158 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3159 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3160 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3161 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3162 ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3163 ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3164 ; GFX6: $vgpr0 = COPY [[COPY7]] 3165 ; GFX6: $vgpr1 = COPY [[COPY8]] 3166 ; GFX6: $vgpr2 = COPY [[COPY9]] 3167 ; GFX6: $vgpr3 = COPY [[COPY10]] 3168 ; GFX6: $vgpr4 = COPY [[COPY11]] 3169 ; GFX6: $vgpr5 = COPY [[COPY12]] 3170 ; GFX6: $vgpr6 = COPY [[COPY13]] 3171 ; GFX6: $vgpr7 = COPY [[COPY14]] 3172 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3173 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 3174 ; GFX7: bb.1 (%ir-block.0): 3175 ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 3176 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3177 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3178 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3179 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3180 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 3181 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3182 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3183 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3184 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3185 ; GFX7: bb.2: 3186 ; GFX7: successors: %bb.3, %bb.2 3187 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3188 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3189 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3190 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3191 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3192 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3193 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3194 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3195 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3196 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3197 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3198 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3199 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3200 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3201 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3202 ; GFX7: bb.3: 3203 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3204 ; GFX7: bb.4: 3205 ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 3206 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3207 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3208 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3209 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3210 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3211 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3212 ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3213 ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3214 ; GFX7: $vgpr0 = COPY [[COPY7]] 3215 ; GFX7: $vgpr1 = COPY [[COPY8]] 3216 ; GFX7: $vgpr2 = COPY [[COPY9]] 3217 ; GFX7: $vgpr3 = COPY [[COPY10]] 3218 ; GFX7: $vgpr4 = COPY [[COPY11]] 3219 ; GFX7: $vgpr5 = COPY [[COPY12]] 3220 ; GFX7: $vgpr6 = COPY [[COPY13]] 3221 ; GFX7: $vgpr7 = COPY [[COPY14]] 3222 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3223 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 3224 ; GFX8: bb.1 (%ir-block.0): 3225 ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 3226 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3227 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3228 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3229 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3230 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 3231 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3232 ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3233 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3234 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3235 ; GFX8: bb.2: 3236 ; GFX8: successors: %bb.3, %bb.2 3237 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3238 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3239 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3240 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3241 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3242 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3243 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3244 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3245 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3246 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3247 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3248 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3249 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3250 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3251 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3252 ; GFX8: bb.3: 3253 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3254 ; GFX8: bb.4: 3255 ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 3256 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3257 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3258 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3259 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3260 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3261 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3262 ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3263 ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3264 ; GFX8: $vgpr0 = COPY [[COPY7]] 3265 ; GFX8: $vgpr1 = COPY [[COPY8]] 3266 ; GFX8: $vgpr2 = COPY [[COPY9]] 3267 ; GFX8: $vgpr3 = COPY [[COPY10]] 3268 ; GFX8: $vgpr4 = COPY [[COPY11]] 3269 ; GFX8: $vgpr5 = COPY [[COPY12]] 3270 ; GFX8: $vgpr6 = COPY [[COPY13]] 3271 ; GFX8: $vgpr7 = COPY [[COPY14]] 3272 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3273 %soffset = add i32 %soffset.base, 4064 3274 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 3275 ret <8 x float> %val 3276} 3277 3278; Need a waterfall loop, but the offset is scalar. 3279; Make sure the maximum offset isn't exeeded when splitting this 3280define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %rsrc, i32 inreg %soffset.base) { 3281 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 3282 ; GFX6: bb.1 (%ir-block.0): 3283 ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 3284 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3285 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3286 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3287 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3288 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 3289 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3290 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 3291 ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc 3292 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] 3293 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 3294 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3295 ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3296 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3297 ; GFX6: bb.2: 3298 ; GFX6: successors: %bb.3, %bb.2 3299 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3300 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3301 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3302 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 3303 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 3304 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 3305 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3306 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 3307 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3308 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3309 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3310 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3311 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3312 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3313 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3314 ; GFX6: bb.3: 3315 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3316 ; GFX6: bb.4: 3317 ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3318 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3319 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3320 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3321 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3322 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3323 ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3324 ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3325 ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3326 ; GFX6: $vgpr0 = COPY [[COPY8]] 3327 ; GFX6: $vgpr1 = COPY [[COPY9]] 3328 ; GFX6: $vgpr2 = COPY [[COPY10]] 3329 ; GFX6: $vgpr3 = COPY [[COPY11]] 3330 ; GFX6: $vgpr4 = COPY [[COPY12]] 3331 ; GFX6: $vgpr5 = COPY [[COPY13]] 3332 ; GFX6: $vgpr6 = COPY [[COPY14]] 3333 ; GFX6: $vgpr7 = COPY [[COPY15]] 3334 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3335 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 3336 ; GFX7: bb.1 (%ir-block.0): 3337 ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 3338 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3339 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3340 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3341 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3342 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 3343 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3344 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 3345 ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc 3346 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] 3347 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 3348 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3349 ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3350 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3351 ; GFX7: bb.2: 3352 ; GFX7: successors: %bb.3, %bb.2 3353 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3354 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3355 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3356 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 3357 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 3358 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 3359 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3360 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 3361 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3362 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3363 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3364 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3365 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3366 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3367 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3368 ; GFX7: bb.3: 3369 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3370 ; GFX7: bb.4: 3371 ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3372 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3373 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3374 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3375 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3376 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3377 ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3378 ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3379 ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3380 ; GFX7: $vgpr0 = COPY [[COPY8]] 3381 ; GFX7: $vgpr1 = COPY [[COPY9]] 3382 ; GFX7: $vgpr2 = COPY [[COPY10]] 3383 ; GFX7: $vgpr3 = COPY [[COPY11]] 3384 ; GFX7: $vgpr4 = COPY [[COPY12]] 3385 ; GFX7: $vgpr5 = COPY [[COPY13]] 3386 ; GFX7: $vgpr6 = COPY [[COPY14]] 3387 ; GFX7: $vgpr7 = COPY [[COPY15]] 3388 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3389 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 3390 ; GFX8: bb.1 (%ir-block.0): 3391 ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 3392 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3393 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3394 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3395 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3396 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 3397 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3398 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 3399 ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc 3400 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] 3401 ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 3402 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3403 ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3404 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3405 ; GFX8: bb.2: 3406 ; GFX8: successors: %bb.3, %bb.2 3407 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3408 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3409 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3410 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 3411 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 3412 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 3413 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3414 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 3415 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3416 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3417 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3418 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3419 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3420 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3421 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3422 ; GFX8: bb.3: 3423 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3424 ; GFX8: bb.4: 3425 ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3426 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3427 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3428 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3429 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3430 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3431 ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3432 ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3433 ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3434 ; GFX8: $vgpr0 = COPY [[COPY8]] 3435 ; GFX8: $vgpr1 = COPY [[COPY9]] 3436 ; GFX8: $vgpr2 = COPY [[COPY10]] 3437 ; GFX8: $vgpr3 = COPY [[COPY11]] 3438 ; GFX8: $vgpr4 = COPY [[COPY12]] 3439 ; GFX8: $vgpr5 = COPY [[COPY13]] 3440 ; GFX8: $vgpr6 = COPY [[COPY14]] 3441 ; GFX8: $vgpr7 = COPY [[COPY15]] 3442 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3443 %soffset = add i32 %soffset.base, 4068 3444 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 3445 ret <8 x float> %val 3446} 3447 3448define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) { 3449 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 3450 ; GFX6: bb.1 (%ir-block.0): 3451 ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 3452 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3453 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3454 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3455 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3456 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 3457 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3458 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 3459 ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc 3460 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] 3461 ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 3462 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3463 ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3464 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3465 ; GFX6: bb.2: 3466 ; GFX6: successors: %bb.3, %bb.2 3467 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3468 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3469 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3470 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 3471 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 3472 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 3473 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3474 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 3475 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3476 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3477 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3478 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3479 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3480 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3481 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3482 ; GFX6: bb.3: 3483 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3484 ; GFX6: bb.4: 3485 ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3486 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3487 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3488 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3489 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3490 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3491 ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3492 ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3493 ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3494 ; GFX6: $vgpr0 = COPY [[COPY8]] 3495 ; GFX6: $vgpr1 = COPY [[COPY9]] 3496 ; GFX6: $vgpr2 = COPY [[COPY10]] 3497 ; GFX6: $vgpr3 = COPY [[COPY11]] 3498 ; GFX6: $vgpr4 = COPY [[COPY12]] 3499 ; GFX6: $vgpr5 = COPY [[COPY13]] 3500 ; GFX6: $vgpr6 = COPY [[COPY14]] 3501 ; GFX6: $vgpr7 = COPY [[COPY15]] 3502 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3503 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 3504 ; GFX7: bb.1 (%ir-block.0): 3505 ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 3506 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3507 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3508 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3509 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3510 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 3511 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3512 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 3513 ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc 3514 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] 3515 ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 3516 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3517 ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3518 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3519 ; GFX7: bb.2: 3520 ; GFX7: successors: %bb.3, %bb.2 3521 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3522 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3523 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3524 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 3525 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 3526 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 3527 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3528 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 3529 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3530 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3531 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3532 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3533 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3534 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3535 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3536 ; GFX7: bb.3: 3537 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3538 ; GFX7: bb.4: 3539 ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3540 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3541 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3542 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3543 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3544 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3545 ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3546 ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3547 ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3548 ; GFX7: $vgpr0 = COPY [[COPY8]] 3549 ; GFX7: $vgpr1 = COPY [[COPY9]] 3550 ; GFX7: $vgpr2 = COPY [[COPY10]] 3551 ; GFX7: $vgpr3 = COPY [[COPY11]] 3552 ; GFX7: $vgpr4 = COPY [[COPY12]] 3553 ; GFX7: $vgpr5 = COPY [[COPY13]] 3554 ; GFX7: $vgpr6 = COPY [[COPY14]] 3555 ; GFX7: $vgpr7 = COPY [[COPY15]] 3556 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3557 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 3558 ; GFX8: bb.1 (%ir-block.0): 3559 ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 3560 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3561 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3562 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3563 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3564 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 3565 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3566 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 3567 ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc 3568 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] 3569 ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 3570 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3571 ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3572 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3573 ; GFX8: bb.2: 3574 ; GFX8: successors: %bb.3, %bb.2 3575 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3576 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3577 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3578 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec 3579 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 3580 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 3581 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3582 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec 3583 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3584 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3585 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3586 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3587 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3588 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3589 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3590 ; GFX8: bb.3: 3591 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3592 ; GFX8: bb.4: 3593 ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3594 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3595 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3596 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3597 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3598 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3599 ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3600 ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3601 ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3602 ; GFX8: $vgpr0 = COPY [[COPY8]] 3603 ; GFX8: $vgpr1 = COPY [[COPY9]] 3604 ; GFX8: $vgpr2 = COPY [[COPY10]] 3605 ; GFX8: $vgpr3 = COPY [[COPY11]] 3606 ; GFX8: $vgpr4 = COPY [[COPY12]] 3607 ; GFX8: $vgpr5 = COPY [[COPY13]] 3608 ; GFX8: $vgpr6 = COPY [[COPY14]] 3609 ; GFX8: $vgpr7 = COPY [[COPY15]] 3610 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3611 %soffset = add i32 %soffset.base, 4096 3612 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 3613 ret <8 x float> %val 3614} 3615 3616define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000(<4 x i32> %rsrc, i32 %offset.base) { 3617 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 3618 ; GFX6: bb.1 (%ir-block.0): 3619 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 3620 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3621 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3622 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3623 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3624 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 3625 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3626 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 3627 ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3628 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3629 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3630 ; GFX6: bb.2: 3631 ; GFX6: successors: %bb.3, %bb.2 3632 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3633 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3634 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3635 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3636 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3637 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3638 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3639 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3640 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3641 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3642 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3643 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3644 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3645 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3646 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3647 ; GFX6: bb.3: 3648 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3649 ; GFX6: bb.4: 3650 ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3651 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3652 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3653 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3654 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3655 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3656 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3657 ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3658 ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3659 ; GFX6: $vgpr0 = COPY [[COPY7]] 3660 ; GFX6: $vgpr1 = COPY [[COPY8]] 3661 ; GFX6: $vgpr2 = COPY [[COPY9]] 3662 ; GFX6: $vgpr3 = COPY [[COPY10]] 3663 ; GFX6: $vgpr4 = COPY [[COPY11]] 3664 ; GFX6: $vgpr5 = COPY [[COPY12]] 3665 ; GFX6: $vgpr6 = COPY [[COPY13]] 3666 ; GFX6: $vgpr7 = COPY [[COPY14]] 3667 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3668 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 3669 ; GFX7: bb.1 (%ir-block.0): 3670 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 3671 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3672 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3673 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3674 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3675 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 3676 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3677 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 3678 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3679 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3680 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3681 ; GFX7: bb.2: 3682 ; GFX7: successors: %bb.3, %bb.2 3683 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3684 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3685 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3686 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3687 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3688 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3689 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3690 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3691 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3692 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3693 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3694 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3695 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3696 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3697 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3698 ; GFX7: bb.3: 3699 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3700 ; GFX7: bb.4: 3701 ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3702 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3703 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3704 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3705 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3706 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3707 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3708 ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3709 ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3710 ; GFX7: $vgpr0 = COPY [[COPY7]] 3711 ; GFX7: $vgpr1 = COPY [[COPY8]] 3712 ; GFX7: $vgpr2 = COPY [[COPY9]] 3713 ; GFX7: $vgpr3 = COPY [[COPY10]] 3714 ; GFX7: $vgpr4 = COPY [[COPY11]] 3715 ; GFX7: $vgpr5 = COPY [[COPY12]] 3716 ; GFX7: $vgpr6 = COPY [[COPY13]] 3717 ; GFX7: $vgpr7 = COPY [[COPY14]] 3718 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3719 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 3720 ; GFX8: bb.1 (%ir-block.0): 3721 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 3722 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3723 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3724 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3725 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3726 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 3727 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3728 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4064 3729 ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3730 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3731 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3732 ; GFX8: bb.2: 3733 ; GFX8: successors: %bb.3, %bb.2 3734 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3735 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3736 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3737 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3738 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3739 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3740 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3741 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3742 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3743 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3744 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 936, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3745 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 952, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3746 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3747 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3748 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3749 ; GFX8: bb.3: 3750 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3751 ; GFX8: bb.4: 3752 ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3753 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3754 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3755 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3756 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3757 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3758 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3759 ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3760 ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3761 ; GFX8: $vgpr0 = COPY [[COPY7]] 3762 ; GFX8: $vgpr1 = COPY [[COPY8]] 3763 ; GFX8: $vgpr2 = COPY [[COPY9]] 3764 ; GFX8: $vgpr3 = COPY [[COPY10]] 3765 ; GFX8: $vgpr4 = COPY [[COPY11]] 3766 ; GFX8: $vgpr5 = COPY [[COPY12]] 3767 ; GFX8: $vgpr6 = COPY [[COPY13]] 3768 ; GFX8: $vgpr7 = COPY [[COPY14]] 3769 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3770 %soffset = add i32 %offset.base, 5000 3771 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 3772 ret <8 x float> %val 3773} 3774 3775define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076(<4 x i32> %rsrc, i32 %offset.base) { 3776 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 3777 ; GFX6: bb.1 (%ir-block.0): 3778 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 3779 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3780 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3781 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3782 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3783 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 3784 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3785 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 3786 ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3787 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3788 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3789 ; GFX6: bb.2: 3790 ; GFX6: successors: %bb.3, %bb.2 3791 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3792 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3793 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3794 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3795 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3796 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3797 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3798 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3799 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3800 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3801 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3802 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3803 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3804 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3805 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3806 ; GFX6: bb.3: 3807 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3808 ; GFX6: bb.4: 3809 ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3810 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3811 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3812 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3813 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3814 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3815 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3816 ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3817 ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3818 ; GFX6: $vgpr0 = COPY [[COPY7]] 3819 ; GFX6: $vgpr1 = COPY [[COPY8]] 3820 ; GFX6: $vgpr2 = COPY [[COPY9]] 3821 ; GFX6: $vgpr3 = COPY [[COPY10]] 3822 ; GFX6: $vgpr4 = COPY [[COPY11]] 3823 ; GFX6: $vgpr5 = COPY [[COPY12]] 3824 ; GFX6: $vgpr6 = COPY [[COPY13]] 3825 ; GFX6: $vgpr7 = COPY [[COPY14]] 3826 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3827 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 3828 ; GFX7: bb.1 (%ir-block.0): 3829 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 3830 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3831 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3832 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3833 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3834 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 3835 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3836 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 3837 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3838 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3839 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3840 ; GFX7: bb.2: 3841 ; GFX7: successors: %bb.3, %bb.2 3842 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3843 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3844 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3845 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3846 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3847 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3848 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3849 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3850 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3851 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3852 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3853 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3854 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3855 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3856 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3857 ; GFX7: bb.3: 3858 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3859 ; GFX7: bb.4: 3860 ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3861 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3862 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3863 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3864 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3865 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3866 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3867 ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3868 ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3869 ; GFX7: $vgpr0 = COPY [[COPY7]] 3870 ; GFX7: $vgpr1 = COPY [[COPY8]] 3871 ; GFX7: $vgpr2 = COPY [[COPY9]] 3872 ; GFX7: $vgpr3 = COPY [[COPY10]] 3873 ; GFX7: $vgpr4 = COPY [[COPY11]] 3874 ; GFX7: $vgpr5 = COPY [[COPY12]] 3875 ; GFX7: $vgpr6 = COPY [[COPY13]] 3876 ; GFX7: $vgpr7 = COPY [[COPY14]] 3877 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3878 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 3879 ; GFX8: bb.1 (%ir-block.0): 3880 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 3881 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3882 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3883 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3884 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3885 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 3886 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3887 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12 3888 ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3889 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3890 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3891 ; GFX8: bb.2: 3892 ; GFX8: successors: %bb.3, %bb.2 3893 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3894 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3895 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3896 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3897 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3898 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3899 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3900 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3901 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3902 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3903 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3904 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3905 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3906 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3907 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3908 ; GFX8: bb.3: 3909 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3910 ; GFX8: bb.4: 3911 ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3912 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3913 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3914 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3915 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3916 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3917 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3918 ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3919 ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3920 ; GFX8: $vgpr0 = COPY [[COPY7]] 3921 ; GFX8: $vgpr1 = COPY [[COPY8]] 3922 ; GFX8: $vgpr2 = COPY [[COPY9]] 3923 ; GFX8: $vgpr3 = COPY [[COPY10]] 3924 ; GFX8: $vgpr4 = COPY [[COPY11]] 3925 ; GFX8: $vgpr5 = COPY [[COPY12]] 3926 ; GFX8: $vgpr6 = COPY [[COPY13]] 3927 ; GFX8: $vgpr7 = COPY [[COPY14]] 3928 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3929 %soffset = add i32 %offset.base, 4076 3930 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 3931 ret <8 x float> %val 3932} 3933 3934define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080(<4 x i32> %rsrc, i32 %offset.base) { 3935 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 3936 ; GFX6: bb.1 (%ir-block.0): 3937 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 3938 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3939 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3940 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3941 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3942 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 3943 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3944 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 3945 ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3946 ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3947 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3948 ; GFX6: bb.2: 3949 ; GFX6: successors: %bb.3, %bb.2 3950 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 3951 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 3952 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 3953 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 3954 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 3955 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 3956 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 3957 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 3958 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 3959 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 3960 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3961 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 3962 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 3963 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 3964 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 3965 ; GFX6: bb.3: 3966 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 3967 ; GFX6: bb.4: 3968 ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 3969 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 3970 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 3971 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 3972 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 3973 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 3974 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 3975 ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 3976 ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 3977 ; GFX6: $vgpr0 = COPY [[COPY7]] 3978 ; GFX6: $vgpr1 = COPY [[COPY8]] 3979 ; GFX6: $vgpr2 = COPY [[COPY9]] 3980 ; GFX6: $vgpr3 = COPY [[COPY10]] 3981 ; GFX6: $vgpr4 = COPY [[COPY11]] 3982 ; GFX6: $vgpr5 = COPY [[COPY12]] 3983 ; GFX6: $vgpr6 = COPY [[COPY13]] 3984 ; GFX6: $vgpr7 = COPY [[COPY14]] 3985 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 3986 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 3987 ; GFX7: bb.1 (%ir-block.0): 3988 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 3989 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3990 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3991 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3992 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 3993 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 3994 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 3995 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 3996 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 3997 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 3998 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 3999 ; GFX7: bb.2: 4000 ; GFX7: successors: %bb.3, %bb.2 4001 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 4002 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 4003 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 4004 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 4005 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 4006 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 4007 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 4008 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 4009 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 4010 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 4011 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 4012 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 4013 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 4014 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 4015 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 4016 ; GFX7: bb.3: 4017 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 4018 ; GFX7: bb.4: 4019 ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 4020 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 4021 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 4022 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 4023 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 4024 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 4025 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 4026 ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 4027 ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 4028 ; GFX7: $vgpr0 = COPY [[COPY7]] 4029 ; GFX7: $vgpr1 = COPY [[COPY8]] 4030 ; GFX7: $vgpr2 = COPY [[COPY9]] 4031 ; GFX7: $vgpr3 = COPY [[COPY10]] 4032 ; GFX7: $vgpr4 = COPY [[COPY11]] 4033 ; GFX7: $vgpr5 = COPY [[COPY12]] 4034 ; GFX7: $vgpr6 = COPY [[COPY13]] 4035 ; GFX7: $vgpr7 = COPY [[COPY14]] 4036 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 4037 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 4038 ; GFX8: bb.1 (%ir-block.0): 4039 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 4040 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4041 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 4042 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 4043 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 4044 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 4045 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4046 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 4047 ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 4048 ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 4049 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 4050 ; GFX8: bb.2: 4051 ; GFX8: successors: %bb.3, %bb.2 4052 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 4053 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 4054 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 4055 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec 4056 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec 4057 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec 4058 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 4059 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec 4060 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 4061 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 4062 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 4063 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) 4064 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 4065 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 4066 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 4067 ; GFX8: bb.3: 4068 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 4069 ; GFX8: bb.4: 4070 ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 4071 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 4072 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 4073 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 4074 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 4075 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 4076 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 4077 ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 4078 ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 4079 ; GFX8: $vgpr0 = COPY [[COPY7]] 4080 ; GFX8: $vgpr1 = COPY [[COPY8]] 4081 ; GFX8: $vgpr2 = COPY [[COPY9]] 4082 ; GFX8: $vgpr3 = COPY [[COPY10]] 4083 ; GFX8: $vgpr4 = COPY [[COPY11]] 4084 ; GFX8: $vgpr5 = COPY [[COPY12]] 4085 ; GFX8: $vgpr6 = COPY [[COPY13]] 4086 ; GFX8: $vgpr7 = COPY [[COPY14]] 4087 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 4088 %soffset = add i32 %offset.base, 4080 4089 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 4090 ret <8 x float> %val 4091} 4092 4093define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064(<4 x i32> %rsrc, i32 %offset.base) { 4094 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 4095 ; GFX6: bb.1 (%ir-block.0): 4096 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 4097 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4098 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 4099 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 4100 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 4101 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4102 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 4103 ; GFX6: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 4104 ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 4105 ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 4106 ; GFX6: bb.2: 4107 ; GFX6: successors: %bb.3, %bb.2 4108 ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec 4109 ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec 4110 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 4111 ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec 4112 ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 4113 ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 4114 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 4115 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec 4116 ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 4117 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 4118 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4) 4119 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4) 4120 ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 4121 ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 4122 ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec 4123 ; GFX6: bb.3: 4124 ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 4125 ; GFX6: bb.4: 4126 ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 4127 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 4128 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 4129 ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 4130 ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 4131 ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 4132 ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 4133 ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 4134 ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 4135 ; GFX6: $vgpr0 = COPY [[COPY6]] 4136 ; GFX6: $vgpr1 = COPY [[COPY7]] 4137 ; GFX6: $vgpr2 = COPY [[COPY8]] 4138 ; GFX6: $vgpr3 = COPY [[COPY9]] 4139 ; GFX6: $vgpr4 = COPY [[COPY10]] 4140 ; GFX6: $vgpr5 = COPY [[COPY11]] 4141 ; GFX6: $vgpr6 = COPY [[COPY12]] 4142 ; GFX6: $vgpr7 = COPY [[COPY13]] 4143 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 4144 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 4145 ; GFX7: bb.1 (%ir-block.0): 4146 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 4147 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4148 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 4149 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 4150 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 4151 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4152 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 4153 ; GFX7: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 4154 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 4155 ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 4156 ; GFX7: bb.2: 4157 ; GFX7: successors: %bb.3, %bb.2 4158 ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec 4159 ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec 4160 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 4161 ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec 4162 ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 4163 ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 4164 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 4165 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec 4166 ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 4167 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 4168 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4) 4169 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4) 4170 ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 4171 ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 4172 ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec 4173 ; GFX7: bb.3: 4174 ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 4175 ; GFX7: bb.4: 4176 ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 4177 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 4178 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 4179 ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 4180 ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 4181 ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 4182 ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 4183 ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 4184 ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 4185 ; GFX7: $vgpr0 = COPY [[COPY6]] 4186 ; GFX7: $vgpr1 = COPY [[COPY7]] 4187 ; GFX7: $vgpr2 = COPY [[COPY8]] 4188 ; GFX7: $vgpr3 = COPY [[COPY9]] 4189 ; GFX7: $vgpr4 = COPY [[COPY10]] 4190 ; GFX7: $vgpr5 = COPY [[COPY11]] 4191 ; GFX7: $vgpr6 = COPY [[COPY12]] 4192 ; GFX7: $vgpr7 = COPY [[COPY13]] 4193 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 4194 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 4195 ; GFX8: bb.1 (%ir-block.0): 4196 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 4197 ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4198 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 4199 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 4200 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 4201 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4202 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 4203 ; GFX8: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 4204 ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 4205 ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 4206 ; GFX8: bb.2: 4207 ; GFX8: successors: %bb.3, %bb.2 4208 ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec 4209 ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec 4210 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 4211 ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec 4212 ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec 4213 ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec 4214 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 4215 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec 4216 ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 4217 ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 4218 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4) 4219 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4) 4220 ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 4221 ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 4222 ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec 4223 ; GFX8: bb.3: 4224 ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 4225 ; GFX8: bb.4: 4226 ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 4227 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 4228 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 4229 ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 4230 ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 4231 ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 4232 ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 4233 ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 4234 ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 4235 ; GFX8: $vgpr0 = COPY [[COPY6]] 4236 ; GFX8: $vgpr1 = COPY [[COPY7]] 4237 ; GFX8: $vgpr2 = COPY [[COPY8]] 4238 ; GFX8: $vgpr3 = COPY [[COPY9]] 4239 ; GFX8: $vgpr4 = COPY [[COPY10]] 4240 ; GFX8: $vgpr5 = COPY [[COPY11]] 4241 ; GFX8: $vgpr6 = COPY [[COPY12]] 4242 ; GFX8: $vgpr7 = COPY [[COPY13]] 4243 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 4244 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0) 4245 ret <8 x float> %val 4246} 4247 4248define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { 4249 ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr 4250 ; GFX6: bb.1 (%ir-block.0): 4251 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4252 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4253 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4254 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4255 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4256 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4257 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4258 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4259 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4260 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4261 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 4262 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr 4263 ; GFX7: bb.1 (%ir-block.0): 4264 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4265 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4266 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4267 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4268 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4269 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4270 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4271 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4272 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4273 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4274 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 4275 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr 4276 ; GFX8: bb.1 (%ir-block.0): 4277 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4278 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4279 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4280 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4281 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4282 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4283 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4284 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4285 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4286 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4287 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 4288 %offset = add i32 %offset.v, %offset.s 4289 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) 4290 ret float %val 4291} 4292 4293define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { 4294 ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr 4295 ; GFX6: bb.1 (%ir-block.0): 4296 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4297 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4298 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4299 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4300 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4301 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4302 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4303 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4304 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4305 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4306 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 4307 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr 4308 ; GFX7: bb.1 (%ir-block.0): 4309 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4310 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4311 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4312 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4313 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4314 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4315 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4316 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4317 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4318 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4319 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 4320 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr 4321 ; GFX8: bb.1 (%ir-block.0): 4322 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4323 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4324 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4325 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4326 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4327 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4328 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4329 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4330 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4331 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4332 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 4333 %offset = add i32 %offset.s, %offset.v 4334 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) 4335 ret float %val 4336} 4337 4338define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { 4339 ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm 4340 ; GFX6: bb.1 (%ir-block.0): 4341 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4342 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4343 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4344 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4345 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4346 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4347 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4348 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4349 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 4350 ; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec 4351 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 4352 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4353 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4354 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 4355 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm 4356 ; GFX7: bb.1 (%ir-block.0): 4357 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4358 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4359 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4360 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4361 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4362 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4363 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4364 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4365 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 4366 ; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec 4367 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 4368 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4369 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4370 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 4371 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm 4372 ; GFX8: bb.1 (%ir-block.0): 4373 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4374 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4375 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4376 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4377 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4378 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4379 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4380 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4381 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 4382 ; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec 4383 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 4384 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4385 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4386 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 4387 %offset.base = add i32 %offset.v, %offset.s 4388 %offset = add i32 %offset.base, 1024 4389 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) 4390 ret float %val 4391} 4392 4393define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { 4394 ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm 4395 ; GFX6: bb.1 (%ir-block.0): 4396 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4397 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4398 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4399 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4400 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4401 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4402 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4403 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4404 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 4405 ; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec 4406 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 4407 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4408 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4409 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 4410 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm 4411 ; GFX7: bb.1 (%ir-block.0): 4412 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4413 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4414 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4415 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4416 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4417 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4418 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4419 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4420 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 4421 ; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec 4422 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 4423 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4424 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4425 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 4426 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm 4427 ; GFX8: bb.1 (%ir-block.0): 4428 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4429 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4430 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4431 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4432 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4433 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4434 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4435 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4436 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 4437 ; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec 4438 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 4439 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4440 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4441 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 4442 %offset.base = add i32 %offset.s, %offset.v 4443 %offset = add i32 %offset.base, 1024 4444 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) 4445 ret float %val 4446} 4447 4448; TODO: Ideally this would be reassociated to fold. 4449define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { 4450 ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr 4451 ; GFX6: bb.1 (%ir-block.0): 4452 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4453 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4454 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4455 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4456 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4457 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4458 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4459 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4460 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 4461 ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc 4462 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4463 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4464 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 4465 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr 4466 ; GFX7: bb.1 (%ir-block.0): 4467 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4468 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4469 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4470 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4471 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4472 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4473 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4474 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4475 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 4476 ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc 4477 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4478 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4479 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 4480 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr 4481 ; GFX8: bb.1 (%ir-block.0): 4482 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4483 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4484 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4485 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4486 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4487 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4488 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4489 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4490 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 4491 ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc 4492 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4493 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4494 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 4495 %offset.base = add i32 %offset.s, 1024 4496 %offset = add i32 %offset.base, %offset.v 4497 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) 4498 ret float %val 4499} 4500 4501define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { 4502 ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr 4503 ; GFX6: bb.1 (%ir-block.0): 4504 ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4505 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4506 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4507 ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4508 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4509 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4510 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4511 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4512 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 4513 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 4514 ; GFX6: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec 4515 ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4516 ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4517 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 4518 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr 4519 ; GFX7: bb.1 (%ir-block.0): 4520 ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4521 ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4522 ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4523 ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4524 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4525 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4526 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4527 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4528 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 4529 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 4530 ; GFX7: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec 4531 ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4532 ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4533 ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 4534 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr 4535 ; GFX8: bb.1 (%ir-block.0): 4536 ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 4537 ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 4538 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 4539 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 4540 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 4541 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4542 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 4543 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 4544 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 4545 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 4546 ; GFX8: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec 4547 ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) 4548 ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 4549 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 4550 %offset.base = add i32 %offset.v, 1024 4551 %offset = add i32 %offset.base, %offset.s 4552 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) 4553 ret float %val 4554} 4555 4556declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) 4557declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg) 4558declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32 immarg) 4559declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32 immarg) 4560declare <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32>, i32, i32 immarg) 4561declare <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32>, i32, i32 immarg) 4562 4563declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32 immarg) 4564declare <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32>, i32, i32 immarg) 4565declare <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32>, i32, i32 immarg) 4566declare <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32>, i32, i32 immarg) 4567declare <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32>, i32, i32 immarg) 4568declare <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32>, i32, i32 immarg) 4569 4570declare i96 @llvm.amdgcn.s.buffer.load.i96(<4 x i32>, i32, i32 immarg) 4571declare i256 @llvm.amdgcn.s.buffer.load.i256(<4 x i32>, i32, i32 immarg) 4572declare i512 @llvm.amdgcn.s.buffer.load.i512(<4 x i32>, i32, i32 immarg) 4573 4574declare <16 x i16> @llvm.amdgcn.s.buffer.load.v16i16(<4 x i32>, i32, i32 immarg) 4575declare <32 x i16> @llvm.amdgcn.s.buffer.load.v32i16(<4 x i32>, i32, i32 immarg) 4576 4577declare <4 x i64> @llvm.amdgcn.s.buffer.load.v4i64(<4 x i32>, i32, i32 immarg) 4578declare <8 x i64> @llvm.amdgcn.s.buffer.load.v8i64(<4 x i32>, i32, i32 immarg) 4579 4580declare <4 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v4p1i8(<4 x i32>, i32, i32 immarg) 4581declare <8 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v8p1i8(<4 x i32>, i32, i32 immarg) 4582