1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -amdgpu-global-isel-new-legality -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -simplify-mir -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s 3; RUN: llc -amdgpu-global-isel-new-legality -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -simplify-mir -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s --check-prefix=GREEDY 4 5; Natural mapping 6define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { 7 ; CHECK-LABEL: name: s_buffer_load_i32 8 ; CHECK: bb.1 (%ir-block.0): 9 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 10 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 11 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 12 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 13 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 14 ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 15 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 16 ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 4) 17 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) 18 ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) 19 ; CHECK: $sgpr0 = COPY [[INT]](s32) 20 ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0 21 ; GREEDY-LABEL: name: s_buffer_load_i32 22 ; GREEDY: bb.1 (%ir-block.0): 23 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 24 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 25 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 26 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 27 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 28 ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 29 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 30 ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 4) 31 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) 32 ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) 33 ; GREEDY: $sgpr0 = COPY [[INT]](s32) 34 ; GREEDY: SI_RETURN_TO_EPILOG implicit $sgpr0 35 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0) 36 ret i32 %val 37} 38 39define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { 40 ; CHECK-LABEL: name: s_buffer_load_v2i32 41 ; CHECK: bb.1 (%ir-block.0): 42 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 43 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 44 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 45 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 46 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 47 ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 48 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 49 ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 8, align 4) 50 ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>) 51 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) 52 ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) 53 ; CHECK: $sgpr0 = COPY [[INT]](s32) 54 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) 55 ; CHECK: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) 56 ; CHECK: $sgpr1 = COPY [[INT1]](s32) 57 ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 58 ; GREEDY-LABEL: name: s_buffer_load_v2i32 59 ; GREEDY: bb.1 (%ir-block.0): 60 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 61 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 62 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 63 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 64 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 65 ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 66 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 67 ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 8, align 4) 68 ; GREEDY: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>) 69 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) 70 ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) 71 ; GREEDY: $sgpr0 = COPY [[INT]](s32) 72 ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) 73 ; GREEDY: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) 74 ; GREEDY: $sgpr1 = COPY [[INT1]](s32) 75 ; GREEDY: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 76 %val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0) 77 ret <2 x i32> %val 78} 79 80define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { 81 ; CHECK-LABEL: name: s_buffer_load_v3i32 82 ; CHECK: bb.1 (%ir-block.0): 83 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 84 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 85 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 86 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 87 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 88 ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 89 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 90 ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 12, align 4) 91 ; CHECK: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF 92 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) 93 ; CHECK: [[BITCAST:%[0-9]+]]:sgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) 94 ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s96) = G_TRUNC [[BITCAST]](s384) 95 ; CHECK: [[BITCAST1:%[0-9]+]]:sgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) 96 ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) 97 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) 98 ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) 99 ; CHECK: $sgpr0 = COPY [[INT]](s32) 100 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) 101 ; CHECK: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) 102 ; CHECK: $sgpr1 = COPY [[INT1]](s32) 103 ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) 104 ; CHECK: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) 105 ; CHECK: $sgpr2 = COPY [[INT2]](s32) 106 ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 107 ; GREEDY-LABEL: name: s_buffer_load_v3i32 108 ; GREEDY: bb.1 (%ir-block.0): 109 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 110 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 111 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 112 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 113 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 114 ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 115 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 116 ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 12, align 4) 117 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF 118 ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) 119 ; GREEDY: [[BITCAST:%[0-9]+]]:sgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) 120 ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s96) = G_TRUNC [[BITCAST]](s384) 121 ; GREEDY: [[BITCAST1:%[0-9]+]]:sgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) 122 ; GREEDY: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) 123 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) 124 ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) 125 ; GREEDY: $sgpr0 = COPY [[INT]](s32) 126 ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) 127 ; GREEDY: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) 128 ; GREEDY: $sgpr1 = COPY [[INT1]](s32) 129 ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) 130 ; GREEDY: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) 131 ; GREEDY: $sgpr2 = COPY [[INT2]](s32) 132 ; GREEDY: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 133 %val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0) 134 ret <3 x i32> %val 135} 136 137define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { 138 ; CHECK-LABEL: name: s_buffer_load_v8i32 139 ; CHECK: bb.1 (%ir-block.0): 140 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 141 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 142 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 143 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 144 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 145 ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 146 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 147 ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 32, align 4) 148 ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) 149 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) 150 ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) 151 ; CHECK: $sgpr0 = COPY [[INT]](s32) 152 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) 153 ; CHECK: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) 154 ; CHECK: $sgpr1 = COPY [[INT1]](s32) 155 ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) 156 ; CHECK: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) 157 ; CHECK: $sgpr2 = COPY [[INT2]](s32) 158 ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) 159 ; CHECK: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) 160 ; CHECK: $sgpr3 = COPY [[INT3]](s32) 161 ; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) 162 ; CHECK: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) 163 ; CHECK: $sgpr4 = COPY [[INT4]](s32) 164 ; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) 165 ; CHECK: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) 166 ; CHECK: $sgpr5 = COPY [[INT5]](s32) 167 ; CHECK: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) 168 ; CHECK: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) 169 ; CHECK: $sgpr6 = COPY [[INT6]](s32) 170 ; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) 171 ; CHECK: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) 172 ; CHECK: $sgpr7 = COPY [[INT7]](s32) 173 ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 174 ; GREEDY-LABEL: name: s_buffer_load_v8i32 175 ; GREEDY: bb.1 (%ir-block.0): 176 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 177 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 178 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 179 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 180 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 181 ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 182 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 183 ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 32, align 4) 184 ; GREEDY: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) 185 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) 186 ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) 187 ; GREEDY: $sgpr0 = COPY [[INT]](s32) 188 ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) 189 ; GREEDY: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) 190 ; GREEDY: $sgpr1 = COPY [[INT1]](s32) 191 ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) 192 ; GREEDY: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) 193 ; GREEDY: $sgpr2 = COPY [[INT2]](s32) 194 ; GREEDY: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) 195 ; GREEDY: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) 196 ; GREEDY: $sgpr3 = COPY [[INT3]](s32) 197 ; GREEDY: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) 198 ; GREEDY: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) 199 ; GREEDY: $sgpr4 = COPY [[INT4]](s32) 200 ; GREEDY: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) 201 ; GREEDY: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) 202 ; GREEDY: $sgpr5 = COPY [[INT5]](s32) 203 ; GREEDY: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) 204 ; GREEDY: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) 205 ; GREEDY: $sgpr6 = COPY [[INT6]](s32) 206 ; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) 207 ; GREEDY: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) 208 ; GREEDY: $sgpr7 = COPY [[INT7]](s32) 209 ; GREEDY: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 210 %val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0) 211 ret <8 x i32> %val 212} 213 214define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { 215 ; CHECK-LABEL: name: s_buffer_load_v16i32 216 ; CHECK: bb.1 (%ir-block.0): 217 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 218 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 219 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 220 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 221 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 222 ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 223 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 224 ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 64, align 4) 225 ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>) 226 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) 227 ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) 228 ; CHECK: $sgpr0 = COPY [[INT]](s32) 229 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) 230 ; CHECK: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) 231 ; CHECK: $sgpr1 = COPY [[INT1]](s32) 232 ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) 233 ; CHECK: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) 234 ; CHECK: $sgpr2 = COPY [[INT2]](s32) 235 ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) 236 ; CHECK: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) 237 ; CHECK: $sgpr3 = COPY [[INT3]](s32) 238 ; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) 239 ; CHECK: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) 240 ; CHECK: $sgpr4 = COPY [[INT4]](s32) 241 ; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) 242 ; CHECK: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) 243 ; CHECK: $sgpr5 = COPY [[INT5]](s32) 244 ; CHECK: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) 245 ; CHECK: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) 246 ; CHECK: $sgpr6 = COPY [[INT6]](s32) 247 ; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) 248 ; CHECK: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) 249 ; CHECK: $sgpr7 = COPY [[INT7]](s32) 250 ; CHECK: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV8]](s32) 251 ; CHECK: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32) 252 ; CHECK: $sgpr8 = COPY [[INT8]](s32) 253 ; CHECK: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV9]](s32) 254 ; CHECK: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32) 255 ; CHECK: $sgpr9 = COPY [[INT9]](s32) 256 ; CHECK: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV10]](s32) 257 ; CHECK: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32) 258 ; CHECK: $sgpr10 = COPY [[INT10]](s32) 259 ; CHECK: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV11]](s32) 260 ; CHECK: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32) 261 ; CHECK: $sgpr11 = COPY [[INT11]](s32) 262 ; CHECK: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV12]](s32) 263 ; CHECK: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32) 264 ; CHECK: $sgpr12 = COPY [[INT12]](s32) 265 ; CHECK: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV13]](s32) 266 ; CHECK: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32) 267 ; CHECK: $sgpr13 = COPY [[INT13]](s32) 268 ; CHECK: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV14]](s32) 269 ; CHECK: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32) 270 ; CHECK: $sgpr14 = COPY [[INT14]](s32) 271 ; CHECK: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[UV15]](s32) 272 ; CHECK: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32) 273 ; CHECK: $sgpr15 = COPY [[INT15]](s32) 274 ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 275 ; GREEDY-LABEL: name: s_buffer_load_v16i32 276 ; GREEDY: bb.1 (%ir-block.0): 277 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 278 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 279 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 280 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 281 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 282 ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 283 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 284 ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 64, align 4) 285 ; GREEDY: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>) 286 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) 287 ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) 288 ; GREEDY: $sgpr0 = COPY [[INT]](s32) 289 ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) 290 ; GREEDY: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) 291 ; GREEDY: $sgpr1 = COPY [[INT1]](s32) 292 ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) 293 ; GREEDY: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) 294 ; GREEDY: $sgpr2 = COPY [[INT2]](s32) 295 ; GREEDY: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) 296 ; GREEDY: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) 297 ; GREEDY: $sgpr3 = COPY [[INT3]](s32) 298 ; GREEDY: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) 299 ; GREEDY: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) 300 ; GREEDY: $sgpr4 = COPY [[INT4]](s32) 301 ; GREEDY: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) 302 ; GREEDY: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) 303 ; GREEDY: $sgpr5 = COPY [[INT5]](s32) 304 ; GREEDY: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) 305 ; GREEDY: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) 306 ; GREEDY: $sgpr6 = COPY [[INT6]](s32) 307 ; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) 308 ; GREEDY: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) 309 ; GREEDY: $sgpr7 = COPY [[INT7]](s32) 310 ; GREEDY: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV8]](s32) 311 ; GREEDY: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32) 312 ; GREEDY: $sgpr8 = COPY [[INT8]](s32) 313 ; GREEDY: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV9]](s32) 314 ; GREEDY: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32) 315 ; GREEDY: $sgpr9 = COPY [[INT9]](s32) 316 ; GREEDY: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV10]](s32) 317 ; GREEDY: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32) 318 ; GREEDY: $sgpr10 = COPY [[INT10]](s32) 319 ; GREEDY: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV11]](s32) 320 ; GREEDY: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32) 321 ; GREEDY: $sgpr11 = COPY [[INT11]](s32) 322 ; GREEDY: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV12]](s32) 323 ; GREEDY: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32) 324 ; GREEDY: $sgpr12 = COPY [[INT12]](s32) 325 ; GREEDY: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV13]](s32) 326 ; GREEDY: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32) 327 ; GREEDY: $sgpr13 = COPY [[INT13]](s32) 328 ; GREEDY: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV14]](s32) 329 ; GREEDY: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32) 330 ; GREEDY: $sgpr14 = COPY [[INT14]](s32) 331 ; GREEDY: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[UV15]](s32) 332 ; GREEDY: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32) 333 ; GREEDY: $sgpr15 = COPY [[INT15]](s32) 334 ; GREEDY: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 335 %val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0) 336 ret <16 x i32> %val 337} 338 339; Check cases that need to be converted to MUBUF due to the offset being a VGPR. 340define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 341 ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_offset 342 ; CHECK: bb.1 (%ir-block.0): 343 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 344 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 345 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 346 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 347 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 348 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 349 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 350 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 351 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 352 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4) 353 ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 354 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 355 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset 356 ; GREEDY: bb.1 (%ir-block.0): 357 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 358 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 359 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 360 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 361 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 362 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 363 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 364 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 365 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 366 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4) 367 ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 368 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 369 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 370 ret float %val 371} 372 373define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 374 ; CHECK-LABEL: name: s_buffer_load_v2f32_vgpr_offset 375 ; CHECK: bb.1 (%ir-block.0): 376 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 377 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 378 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 379 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 380 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 381 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 382 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 383 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 384 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 385 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 8, align 4) 386 ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<2 x s32>) 387 ; CHECK: $vgpr0 = COPY [[UV]](s32) 388 ; CHECK: $vgpr1 = COPY [[UV1]](s32) 389 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 390 ; GREEDY-LABEL: name: s_buffer_load_v2f32_vgpr_offset 391 ; GREEDY: bb.1 (%ir-block.0): 392 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 393 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 394 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 395 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 396 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 397 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 398 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 399 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 400 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 401 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 8, align 4) 402 ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<2 x s32>) 403 ; GREEDY: $vgpr0 = COPY [[UV]](s32) 404 ; GREEDY: $vgpr1 = COPY [[UV1]](s32) 405 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 406 %val = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 407 ret <2 x float> %val 408} 409 410define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 411 ; CHECK-LABEL: name: s_buffer_load_v3f32_vgpr_offset 412 ; CHECK: bb.1 (%ir-block.0): 413 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 414 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 415 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 416 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 417 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 418 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 419 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 420 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 421 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 422 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 423 ; CHECK: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF 424 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) 425 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) 426 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[COPY5]](<4 x s32>), [[COPY6]](<4 x s32>) 427 ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) 428 ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[BITCAST]](s384) 429 ; CHECK: [[BITCAST1:%[0-9]+]]:vgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) 430 ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) 431 ; CHECK: $vgpr0 = COPY [[UV]](s32) 432 ; CHECK: $vgpr1 = COPY [[UV1]](s32) 433 ; CHECK: $vgpr2 = COPY [[UV2]](s32) 434 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 435 ; GREEDY-LABEL: name: s_buffer_load_v3f32_vgpr_offset 436 ; GREEDY: bb.1 (%ir-block.0): 437 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 438 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 439 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 440 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 441 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 442 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 443 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 444 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 445 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 446 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 447 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF 448 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) 449 ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) 450 ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[COPY5]](<4 x s32>), [[COPY6]](<4 x s32>) 451 ; GREEDY: [[BITCAST:%[0-9]+]]:vgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) 452 ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[BITCAST]](s384) 453 ; GREEDY: [[BITCAST1:%[0-9]+]]:vgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) 454 ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) 455 ; GREEDY: $vgpr0 = COPY [[UV]](s32) 456 ; GREEDY: $vgpr1 = COPY [[UV1]](s32) 457 ; GREEDY: $vgpr2 = COPY [[UV2]](s32) 458 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 459 %val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 460 ret <3 x float> %val 461} 462 463define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 464 ; CHECK-LABEL: name: s_buffer_load_v4f32_vgpr_offset 465 ; CHECK: bb.1 (%ir-block.0): 466 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 467 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 468 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 469 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 470 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 471 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 472 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 473 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 474 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 475 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 476 ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) 477 ; CHECK: $vgpr0 = COPY [[UV]](s32) 478 ; CHECK: $vgpr1 = COPY [[UV1]](s32) 479 ; CHECK: $vgpr2 = COPY [[UV2]](s32) 480 ; CHECK: $vgpr3 = COPY [[UV3]](s32) 481 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 482 ; GREEDY-LABEL: name: s_buffer_load_v4f32_vgpr_offset 483 ; GREEDY: bb.1 (%ir-block.0): 484 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 485 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 486 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 487 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 488 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 489 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 490 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 491 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 492 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 493 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 494 ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) 495 ; GREEDY: $vgpr0 = COPY [[UV]](s32) 496 ; GREEDY: $vgpr1 = COPY [[UV1]](s32) 497 ; GREEDY: $vgpr2 = COPY [[UV2]](s32) 498 ; GREEDY: $vgpr3 = COPY [[UV3]](s32) 499 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 500 %val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 501 ret <4 x float> %val 502} 503 504define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 505 ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_offset 506 ; CHECK: bb.1 (%ir-block.0): 507 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 508 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 509 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 510 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 511 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 512 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 513 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 514 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 515 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 516 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 517 ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 518 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) 519 ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) 520 ; CHECK: $vgpr0 = COPY [[UV]](s32) 521 ; CHECK: $vgpr1 = COPY [[UV1]](s32) 522 ; CHECK: $vgpr2 = COPY [[UV2]](s32) 523 ; CHECK: $vgpr3 = COPY [[UV3]](s32) 524 ; CHECK: $vgpr4 = COPY [[UV4]](s32) 525 ; CHECK: $vgpr5 = COPY [[UV5]](s32) 526 ; CHECK: $vgpr6 = COPY [[UV6]](s32) 527 ; CHECK: $vgpr7 = COPY [[UV7]](s32) 528 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 529 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset 530 ; GREEDY: bb.1 (%ir-block.0): 531 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 532 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 533 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 534 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 535 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 536 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 537 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 538 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 539 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 540 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 541 ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 542 ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) 543 ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) 544 ; GREEDY: $vgpr0 = COPY [[UV]](s32) 545 ; GREEDY: $vgpr1 = COPY [[UV1]](s32) 546 ; GREEDY: $vgpr2 = COPY [[UV2]](s32) 547 ; GREEDY: $vgpr3 = COPY [[UV3]](s32) 548 ; GREEDY: $vgpr4 = COPY [[UV4]](s32) 549 ; GREEDY: $vgpr5 = COPY [[UV5]](s32) 550 ; GREEDY: $vgpr6 = COPY [[UV6]](s32) 551 ; GREEDY: $vgpr7 = COPY [[UV7]](s32) 552 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 553 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 554 ret <8 x float> %val 555} 556 557define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 558 ; CHECK-LABEL: name: s_buffer_load_v16f32_vgpr_offset 559 ; CHECK: bb.1 (%ir-block.0): 560 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 561 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 562 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 563 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 564 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 565 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 566 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 567 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 568 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 569 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 570 ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 571 ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 + 16, align 4) 572 ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 + 48, align 4) 573 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) 574 ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) 575 ; CHECK: $vgpr0 = COPY [[UV]](s32) 576 ; CHECK: $vgpr1 = COPY [[UV1]](s32) 577 ; CHECK: $vgpr2 = COPY [[UV2]](s32) 578 ; CHECK: $vgpr3 = COPY [[UV3]](s32) 579 ; CHECK: $vgpr4 = COPY [[UV4]](s32) 580 ; CHECK: $vgpr5 = COPY [[UV5]](s32) 581 ; CHECK: $vgpr6 = COPY [[UV6]](s32) 582 ; CHECK: $vgpr7 = COPY [[UV7]](s32) 583 ; CHECK: $vgpr8 = COPY [[UV8]](s32) 584 ; CHECK: $vgpr9 = COPY [[UV9]](s32) 585 ; CHECK: $vgpr10 = COPY [[UV10]](s32) 586 ; CHECK: $vgpr11 = COPY [[UV11]](s32) 587 ; CHECK: $vgpr12 = COPY [[UV12]](s32) 588 ; CHECK: $vgpr13 = COPY [[UV13]](s32) 589 ; CHECK: $vgpr14 = COPY [[UV14]](s32) 590 ; CHECK: $vgpr15 = COPY [[UV15]](s32) 591 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 592 ; GREEDY-LABEL: name: s_buffer_load_v16f32_vgpr_offset 593 ; GREEDY: bb.1 (%ir-block.0): 594 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 595 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 596 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 597 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 598 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 599 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 600 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 601 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 602 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 603 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 604 ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 605 ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 + 16, align 4) 606 ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 + 48, align 4) 607 ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) 608 ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) 609 ; GREEDY: $vgpr0 = COPY [[UV]](s32) 610 ; GREEDY: $vgpr1 = COPY [[UV1]](s32) 611 ; GREEDY: $vgpr2 = COPY [[UV2]](s32) 612 ; GREEDY: $vgpr3 = COPY [[UV3]](s32) 613 ; GREEDY: $vgpr4 = COPY [[UV4]](s32) 614 ; GREEDY: $vgpr5 = COPY [[UV5]](s32) 615 ; GREEDY: $vgpr6 = COPY [[UV6]](s32) 616 ; GREEDY: $vgpr7 = COPY [[UV7]](s32) 617 ; GREEDY: $vgpr8 = COPY [[UV8]](s32) 618 ; GREEDY: $vgpr9 = COPY [[UV9]](s32) 619 ; GREEDY: $vgpr10 = COPY [[UV10]](s32) 620 ; GREEDY: $vgpr11 = COPY [[UV11]](s32) 621 ; GREEDY: $vgpr12 = COPY [[UV12]](s32) 622 ; GREEDY: $vgpr13 = COPY [[UV13]](s32) 623 ; GREEDY: $vgpr14 = COPY [[UV14]](s32) 624 ; GREEDY: $vgpr15 = COPY [[UV15]](s32) 625 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 626 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 627 ret <16 x float> %val 628} 629 630define amdgpu_ps void @s_buffer_load_i96_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 631 ; CHECK-LABEL: name: s_buffer_load_i96_vgpr_offset 632 ; CHECK: bb.1 (%ir-block.0): 633 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 634 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 635 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 636 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 637 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 638 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 639 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 640 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF 641 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 642 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 643 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 644 ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128) 645 ; CHECK: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store 12 into `i96 addrspace(1)* undef`, align 8, addrspace 1) 646 ; CHECK: S_ENDPGM 0 647 ; GREEDY-LABEL: name: s_buffer_load_i96_vgpr_offset 648 ; GREEDY: bb.1 (%ir-block.0): 649 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 650 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 651 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 652 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 653 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 654 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 655 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 656 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF 657 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 658 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 659 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 660 ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128) 661 ; GREEDY: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store 12 into `i96 addrspace(1)* undef`, align 8, addrspace 1) 662 ; GREEDY: S_ENDPGM 0 663 %val = call i96 @llvm.amdgcn.s.buffer.load.i96(<4 x i32> %rsrc, i32 %soffset, i32 0) 664 store i96 %val, i96 addrspace(1)* undef 665 ret void 666} 667 668; Test split of a wide scalar 669define amdgpu_ps void @s_buffer_load_i256_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 670 ; CHECK-LABEL: name: s_buffer_load_i256_vgpr_offset 671 ; CHECK: bb.1 (%ir-block.0): 672 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 673 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 674 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 675 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 676 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 677 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 678 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 679 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF 680 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 681 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 682 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 683 ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 684 ; CHECK: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128) 685 ; CHECK: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256) 686 ; CHECK: G_STORE [[UV]](s128), [[DEF]](p1) :: (store 16 into `i256 addrspace(1)* undef`, align 8, addrspace 1) 687 ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 688 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) 689 ; CHECK: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store 16 into `i256 addrspace(1)* undef` + 16, align 8, addrspace 1) 690 ; CHECK: S_ENDPGM 0 691 ; GREEDY-LABEL: name: s_buffer_load_i256_vgpr_offset 692 ; GREEDY: bb.1 (%ir-block.0): 693 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 694 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 695 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 696 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 697 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 698 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 699 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 700 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF 701 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 702 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 703 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 704 ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 705 ; GREEDY: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128) 706 ; GREEDY: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256) 707 ; GREEDY: G_STORE [[UV]](s128), [[DEF]](p1) :: (store 16 into `i256 addrspace(1)* undef`, align 8, addrspace 1) 708 ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 709 ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) 710 ; GREEDY: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store 16 into `i256 addrspace(1)* undef` + 16, align 8, addrspace 1) 711 ; GREEDY: S_ENDPGM 0 712 %val = call i256 @llvm.amdgcn.s.buffer.load.i256(<4 x i32> %rsrc, i32 %soffset, i32 0) 713 store i256 %val, i256 addrspace(1)* undef 714 ret void 715} 716 717; Test split of a wide scalar 718define amdgpu_ps void @s_buffer_load_i512_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 719 ; CHECK-LABEL: name: s_buffer_load_i512_vgpr_offset 720 ; CHECK: bb.1 (%ir-block.0): 721 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 722 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 723 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 724 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 725 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 726 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 727 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 728 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF 729 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 730 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 731 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 732 ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 733 ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 + 16, align 4) 734 ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 + 48, align 4) 735 ; CHECK: [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128) 736 ; CHECK: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512) 737 ; CHECK: G_STORE [[UV]](s128), [[DEF]](p1) :: (store 16 into `i512 addrspace(1)* undef`, align 8, addrspace 1) 738 ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 739 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) 740 ; CHECK: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store 16 into `i512 addrspace(1)* undef` + 16, align 8, addrspace 1) 741 ; CHECK: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 742 ; CHECK: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) 743 ; CHECK: G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store 16 into `i512 addrspace(1)* undef` + 32, align 8, addrspace 1) 744 ; CHECK: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 745 ; CHECK: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) 746 ; CHECK: G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store 16 into `i512 addrspace(1)* undef` + 48, align 8, addrspace 1) 747 ; CHECK: S_ENDPGM 0 748 ; GREEDY-LABEL: name: s_buffer_load_i512_vgpr_offset 749 ; GREEDY: bb.1 (%ir-block.0): 750 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 751 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 752 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 753 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 754 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 755 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 756 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 757 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF 758 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 759 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 760 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 761 ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 762 ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 + 16, align 4) 763 ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 + 48, align 4) 764 ; GREEDY: [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128) 765 ; GREEDY: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512) 766 ; GREEDY: G_STORE [[UV]](s128), [[DEF]](p1) :: (store 16 into `i512 addrspace(1)* undef`, align 8, addrspace 1) 767 ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 768 ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) 769 ; GREEDY: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store 16 into `i512 addrspace(1)* undef` + 16, align 8, addrspace 1) 770 ; GREEDY: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 771 ; GREEDY: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) 772 ; GREEDY: G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store 16 into `i512 addrspace(1)* undef` + 32, align 8, addrspace 1) 773 ; GREEDY: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 774 ; GREEDY: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) 775 ; GREEDY: G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store 16 into `i512 addrspace(1)* undef` + 48, align 8, addrspace 1) 776 ; GREEDY: S_ENDPGM 0 777 %val = call i512 @llvm.amdgcn.s.buffer.load.i512(<4 x i32> %rsrc, i32 %soffset, i32 0) 778 store i512 %val, i512 addrspace(1)* undef 779 ret void 780} 781 782; Test split of a vector with 16-bit elements 783define amdgpu_ps void @s_buffer_load_v16i16_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 784 ; CHECK-LABEL: name: s_buffer_load_v16i16_vgpr_offset 785 ; CHECK: bb.1 (%ir-block.0): 786 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 787 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 788 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 789 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 790 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 791 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 792 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 793 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF 794 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 795 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 796 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 797 ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 798 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>) 799 ; CHECK: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) 800 ; CHECK: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store 16 into `<16 x i16> addrspace(1)* undef`, align 32, addrspace 1) 801 ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 802 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) 803 ; CHECK: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store 16 into `<16 x i16> addrspace(1)* undef` + 16, basealign 32, addrspace 1) 804 ; CHECK: S_ENDPGM 0 805 ; GREEDY-LABEL: name: s_buffer_load_v16i16_vgpr_offset 806 ; GREEDY: bb.1 (%ir-block.0): 807 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 808 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 809 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 810 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 811 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 812 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 813 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 814 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF 815 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 816 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 817 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 818 ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 819 ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>) 820 ; GREEDY: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) 821 ; GREEDY: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store 16 into `<16 x i16> addrspace(1)* undef`, align 32, addrspace 1) 822 ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 823 ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) 824 ; GREEDY: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store 16 into `<16 x i16> addrspace(1)* undef` + 16, basealign 32, addrspace 1) 825 ; GREEDY: S_ENDPGM 0 826 %val = call <16 x i16> @llvm.amdgcn.s.buffer.load.v16i16(<4 x i32> %rsrc, i32 %soffset, i32 0) 827 store <16 x i16> %val, <16 x i16> addrspace(1)* undef 828 ret void 829} 830 831; Test split of a vector with 16-bit elements 832define amdgpu_ps void @s_buffer_load_v32i16_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 833 ; CHECK-LABEL: name: s_buffer_load_v32i16_vgpr_offset 834 ; CHECK: bb.1 (%ir-block.0): 835 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 836 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 837 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 838 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 839 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 840 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 841 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 842 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF 843 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 844 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 845 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 846 ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 847 ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 + 16, align 4) 848 ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 + 48, align 4) 849 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>) 850 ; CHECK: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>) 851 ; CHECK: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store 16 into `<32 x i16> addrspace(1)* undef`, align 64, addrspace 1) 852 ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 853 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) 854 ; CHECK: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store 16 into `<32 x i16> addrspace(1)* undef` + 16, basealign 64, addrspace 1) 855 ; CHECK: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 856 ; CHECK: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) 857 ; CHECK: G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store 16 into `<32 x i16> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) 858 ; CHECK: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 859 ; CHECK: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) 860 ; CHECK: G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store 16 into `<32 x i16> addrspace(1)* undef` + 48, basealign 64, addrspace 1) 861 ; CHECK: S_ENDPGM 0 862 ; GREEDY-LABEL: name: s_buffer_load_v32i16_vgpr_offset 863 ; GREEDY: bb.1 (%ir-block.0): 864 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 865 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 866 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 867 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 868 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 869 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 870 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 871 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF 872 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 873 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 874 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 875 ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 876 ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 + 16, align 4) 877 ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 + 48, align 4) 878 ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>) 879 ; GREEDY: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>) 880 ; GREEDY: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store 16 into `<32 x i16> addrspace(1)* undef`, align 64, addrspace 1) 881 ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 882 ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) 883 ; GREEDY: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store 16 into `<32 x i16> addrspace(1)* undef` + 16, basealign 64, addrspace 1) 884 ; GREEDY: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 885 ; GREEDY: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) 886 ; GREEDY: G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store 16 into `<32 x i16> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) 887 ; GREEDY: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 888 ; GREEDY: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) 889 ; GREEDY: G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store 16 into `<32 x i16> addrspace(1)* undef` + 48, basealign 64, addrspace 1) 890 ; GREEDY: S_ENDPGM 0 891 %val = call <32 x i16> @llvm.amdgcn.s.buffer.load.v32i16(<4 x i32> %rsrc, i32 %soffset, i32 0) 892 store <32 x i16> %val, <32 x i16> addrspace(1)* undef 893 ret void 894} 895 896; Test split of a vector with 64-bit elements 897define amdgpu_ps void @s_buffer_load_v4i64_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 898 ; CHECK-LABEL: name: s_buffer_load_v4i64_vgpr_offset 899 ; CHECK: bb.1 (%ir-block.0): 900 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 901 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 902 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 903 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 904 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 905 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 906 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 907 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF 908 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 909 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 910 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 911 ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 912 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>) 913 ; CHECK: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) 914 ; CHECK: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store 16 into `<4 x i64> addrspace(1)* undef`, align 32, addrspace 1) 915 ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 916 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) 917 ; CHECK: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store 16 into `<4 x i64> addrspace(1)* undef` + 16, basealign 32, addrspace 1) 918 ; CHECK: S_ENDPGM 0 919 ; GREEDY-LABEL: name: s_buffer_load_v4i64_vgpr_offset 920 ; GREEDY: bb.1 (%ir-block.0): 921 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 922 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 923 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 924 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 925 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 926 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 927 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 928 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF 929 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 930 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 931 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 932 ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 933 ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>) 934 ; GREEDY: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) 935 ; GREEDY: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store 16 into `<4 x i64> addrspace(1)* undef`, align 32, addrspace 1) 936 ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 937 ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) 938 ; GREEDY: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store 16 into `<4 x i64> addrspace(1)* undef` + 16, basealign 32, addrspace 1) 939 ; GREEDY: S_ENDPGM 0 940 %val = call <4 x i64> @llvm.amdgcn.s.buffer.load.v4i64(<4 x i32> %rsrc, i32 %soffset, i32 0) 941 store <4 x i64> %val, <4 x i64> addrspace(1)* undef 942 ret void 943} 944 945; Test split of a vector with 64-bit elements 946define amdgpu_ps void @s_buffer_load_v8i64_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 947 ; CHECK-LABEL: name: s_buffer_load_v8i64_vgpr_offset 948 ; CHECK: bb.1 (%ir-block.0): 949 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 950 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 951 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 952 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 953 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 954 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 955 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 956 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF 957 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 958 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 959 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 960 ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 961 ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 + 16, align 4) 962 ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 + 48, align 4) 963 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>) 964 ; CHECK: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) 965 ; CHECK: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store 16 into `<8 x i64> addrspace(1)* undef`, align 64, addrspace 1) 966 ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 967 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) 968 ; CHECK: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store 16 into `<8 x i64> addrspace(1)* undef` + 16, basealign 64, addrspace 1) 969 ; CHECK: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 970 ; CHECK: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) 971 ; CHECK: G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store 16 into `<8 x i64> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) 972 ; CHECK: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 973 ; CHECK: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) 974 ; CHECK: G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store 16 into `<8 x i64> addrspace(1)* undef` + 48, basealign 64, addrspace 1) 975 ; CHECK: S_ENDPGM 0 976 ; GREEDY-LABEL: name: s_buffer_load_v8i64_vgpr_offset 977 ; GREEDY: bb.1 (%ir-block.0): 978 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 979 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 980 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 981 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 982 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 983 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 984 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 985 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF 986 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 987 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 988 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 989 ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 990 ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 + 16, align 4) 991 ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 + 48, align 4) 992 ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>) 993 ; GREEDY: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) 994 ; GREEDY: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store 16 into `<8 x i64> addrspace(1)* undef`, align 64, addrspace 1) 995 ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 996 ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) 997 ; GREEDY: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store 16 into `<8 x i64> addrspace(1)* undef` + 16, basealign 64, addrspace 1) 998 ; GREEDY: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 999 ; GREEDY: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) 1000 ; GREEDY: G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store 16 into `<8 x i64> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) 1001 ; GREEDY: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 1002 ; GREEDY: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) 1003 ; GREEDY: G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store 16 into `<8 x i64> addrspace(1)* undef` + 48, basealign 64, addrspace 1) 1004 ; GREEDY: S_ENDPGM 0 1005 %val = call <8 x i64> @llvm.amdgcn.s.buffer.load.v8i64(<4 x i32> %rsrc, i32 %soffset, i32 0) 1006 store <8 x i64> %val, <8 x i64> addrspace(1)* undef 1007 ret void 1008} 1009 1010; Test split of a vector with 64-bit pointer elements 1011define amdgpu_ps void @s_buffer_load_v4p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 1012 ; CHECK-LABEL: name: s_buffer_load_v4p1_vgpr_offset 1013 ; CHECK: bb.1 (%ir-block.0): 1014 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1015 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1016 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 1017 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 1018 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 1019 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1020 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1021 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF 1022 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 1023 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1024 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 1025 ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 1026 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>) 1027 ; CHECK: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>) 1028 ; CHECK: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store 16 into `<4 x i8 addrspace(1)*> addrspace(1)* undef`, align 32, addrspace 1) 1029 ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 1030 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) 1031 ; CHECK: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store 16 into `<4 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 32, addrspace 1) 1032 ; CHECK: S_ENDPGM 0 1033 ; GREEDY-LABEL: name: s_buffer_load_v4p1_vgpr_offset 1034 ; GREEDY: bb.1 (%ir-block.0): 1035 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1036 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1037 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 1038 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 1039 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 1040 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1041 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1042 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF 1043 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 1044 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1045 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 1046 ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 1047 ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>) 1048 ; GREEDY: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>) 1049 ; GREEDY: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store 16 into `<4 x i8 addrspace(1)*> addrspace(1)* undef`, align 32, addrspace 1) 1050 ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 1051 ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) 1052 ; GREEDY: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store 16 into `<4 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 32, addrspace 1) 1053 ; GREEDY: S_ENDPGM 0 1054 %val = call <4 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v4p1i8(<4 x i32> %rsrc, i32 %soffset, i32 0) 1055 store <4 x i8 addrspace(1)*> %val, <4 x i8 addrspace(1)*> addrspace(1)* undef 1056 ret void 1057} 1058 1059; Test split of a vector with 64-bit pointer elements 1060define amdgpu_ps void @s_buffer_load_v8p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { 1061 ; CHECK-LABEL: name: s_buffer_load_v8p1_vgpr_offset 1062 ; CHECK: bb.1 (%ir-block.0): 1063 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1064 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1065 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 1066 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 1067 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 1068 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1069 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1070 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF 1071 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 1072 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1073 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 1074 ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 1075 ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 + 16, align 4) 1076 ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 + 48, align 4) 1077 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>) 1078 ; CHECK: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>) 1079 ; CHECK: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store 16 into `<8 x i8 addrspace(1)*> addrspace(1)* undef`, align 64, addrspace 1) 1080 ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 1081 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) 1082 ; CHECK: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store 16 into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 64, addrspace 1) 1083 ; CHECK: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 1084 ; CHECK: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) 1085 ; CHECK: G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store 16 into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) 1086 ; CHECK: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 1087 ; CHECK: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) 1088 ; CHECK: G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store 16 into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 48, basealign 64, addrspace 1) 1089 ; CHECK: S_ENDPGM 0 1090 ; GREEDY-LABEL: name: s_buffer_load_v8p1_vgpr_offset 1091 ; GREEDY: bb.1 (%ir-block.0): 1092 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1093 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1094 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 1095 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 1096 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 1097 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1098 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1099 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF 1100 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 1101 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1102 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 1103 ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 1104 ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 + 16, align 4) 1105 ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 + 48, align 4) 1106 ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>) 1107 ; GREEDY: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>) 1108 ; GREEDY: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store 16 into `<8 x i8 addrspace(1)*> addrspace(1)* undef`, align 64, addrspace 1) 1109 ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 1110 ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) 1111 ; GREEDY: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store 16 into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 64, addrspace 1) 1112 ; GREEDY: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 1113 ; GREEDY: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) 1114 ; GREEDY: G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store 16 into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) 1115 ; GREEDY: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 1116 ; GREEDY: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) 1117 ; GREEDY: G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store 16 into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 48, basealign 64, addrspace 1) 1118 ; GREEDY: S_ENDPGM 0 1119 %val = call <8 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v8p1i8(<4 x i32> %rsrc, i32 %soffset, i32 0) 1120 store <8 x i8 addrspace(1)*> %val, <8 x i8 addrspace(1)*> addrspace(1)* undef 1121 ret void 1122} 1123 1124define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg %rsrc, i32 %soffset.base) { 1125 ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 1126 ; CHECK: bb.1 (%ir-block.0): 1127 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1128 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1129 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 1130 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 1131 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 1132 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1133 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1134 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 1135 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 1136 ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] 1137 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 1138 ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1139 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load 4) 1140 ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 1141 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 1142 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 1143 ; GREEDY: bb.1 (%ir-block.0): 1144 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1145 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1146 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 1147 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 1148 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 1149 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1150 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1151 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 1152 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 1153 ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] 1154 ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 1155 ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1156 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load 4) 1157 ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 1158 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 1159 %soffset = add i32 %soffset.base, 4092 1160 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1161 ret float %val 1162} 1163 1164define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg %rsrc, i32 %soffset.base) { 1165 ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 1166 ; CHECK: bb.1 (%ir-block.0): 1167 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1168 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1169 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 1170 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 1171 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 1172 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1173 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1174 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 1175 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 1176 ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] 1177 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 1178 ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1179 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load 4) 1180 ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 1181 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 1182 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 1183 ; GREEDY: bb.1 (%ir-block.0): 1184 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1185 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1186 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 1187 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 1188 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 1189 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1190 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1191 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 1192 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 1193 ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] 1194 ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 1195 ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1196 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load 4) 1197 ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 1198 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 1199 %soffset = add i32 %soffset.base, 4095 1200 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1201 ret float %val 1202} 1203 1204define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg %rsrc, i32 %soffset.base) { 1205 ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 1206 ; CHECK: bb.1 (%ir-block.0): 1207 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1208 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1209 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 1210 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 1211 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 1212 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1213 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1214 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 1215 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 1216 ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] 1217 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1218 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4) 1219 ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 1220 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 1221 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 1222 ; GREEDY: bb.1 (%ir-block.0): 1223 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1224 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1225 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 1226 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 1227 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 1228 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1229 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1230 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 1231 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 1232 ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] 1233 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1234 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4) 1235 ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 1236 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 1237 %soffset = add i32 %soffset.base, 4096 1238 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1239 ret float %val 1240} 1241 1242; Make sure the base offset is added to each split load. 1243define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> inreg %rsrc, i32 %soffset.base) { 1244 ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 1245 ; CHECK: bb.1 (%ir-block.0): 1246 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1247 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1248 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 1249 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 1250 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 1251 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1252 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1253 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 1254 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 1255 ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] 1256 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 1257 ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1258 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load 16, align 4) 1259 ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load 16, align 4) 1260 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) 1261 ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) 1262 ; CHECK: $vgpr0 = COPY [[UV]](s32) 1263 ; CHECK: $vgpr1 = COPY [[UV1]](s32) 1264 ; CHECK: $vgpr2 = COPY [[UV2]](s32) 1265 ; CHECK: $vgpr3 = COPY [[UV3]](s32) 1266 ; CHECK: $vgpr4 = COPY [[UV4]](s32) 1267 ; CHECK: $vgpr5 = COPY [[UV5]](s32) 1268 ; CHECK: $vgpr6 = COPY [[UV6]](s32) 1269 ; CHECK: $vgpr7 = COPY [[UV7]](s32) 1270 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 1271 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 1272 ; GREEDY: bb.1 (%ir-block.0): 1273 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1274 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1275 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 1276 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 1277 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 1278 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1279 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1280 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 1281 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 1282 ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] 1283 ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 1284 ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1285 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load 16, align 4) 1286 ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load 16, align 4) 1287 ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) 1288 ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) 1289 ; GREEDY: $vgpr0 = COPY [[UV]](s32) 1290 ; GREEDY: $vgpr1 = COPY [[UV1]](s32) 1291 ; GREEDY: $vgpr2 = COPY [[UV2]](s32) 1292 ; GREEDY: $vgpr3 = COPY [[UV3]](s32) 1293 ; GREEDY: $vgpr4 = COPY [[UV4]](s32) 1294 ; GREEDY: $vgpr5 = COPY [[UV5]](s32) 1295 ; GREEDY: $vgpr6 = COPY [[UV6]](s32) 1296 ; GREEDY: $vgpr7 = COPY [[UV7]](s32) 1297 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 1298 %soffset = add i32 %soffset.base, 4064 1299 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1300 ret <8 x float> %val 1301} 1302 1303; Make sure the maximum offset isn't exeeded when splitting this 1304define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> inreg %rsrc, i32 %soffset.base) { 1305 ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 1306 ; CHECK: bb.1 (%ir-block.0): 1307 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1308 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1309 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 1310 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 1311 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 1312 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1313 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1314 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 1315 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 1316 ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] 1317 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1318 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 1319 ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 1320 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) 1321 ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) 1322 ; CHECK: $vgpr0 = COPY [[UV]](s32) 1323 ; CHECK: $vgpr1 = COPY [[UV1]](s32) 1324 ; CHECK: $vgpr2 = COPY [[UV2]](s32) 1325 ; CHECK: $vgpr3 = COPY [[UV3]](s32) 1326 ; CHECK: $vgpr4 = COPY [[UV4]](s32) 1327 ; CHECK: $vgpr5 = COPY [[UV5]](s32) 1328 ; CHECK: $vgpr6 = COPY [[UV6]](s32) 1329 ; CHECK: $vgpr7 = COPY [[UV7]](s32) 1330 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 1331 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 1332 ; GREEDY: bb.1 (%ir-block.0): 1333 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1334 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1335 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 1336 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 1337 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 1338 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1339 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1340 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 1341 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 1342 ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] 1343 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1344 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 1345 ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 1346 ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) 1347 ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) 1348 ; GREEDY: $vgpr0 = COPY [[UV]](s32) 1349 ; GREEDY: $vgpr1 = COPY [[UV1]](s32) 1350 ; GREEDY: $vgpr2 = COPY [[UV2]](s32) 1351 ; GREEDY: $vgpr3 = COPY [[UV3]](s32) 1352 ; GREEDY: $vgpr4 = COPY [[UV4]](s32) 1353 ; GREEDY: $vgpr5 = COPY [[UV5]](s32) 1354 ; GREEDY: $vgpr6 = COPY [[UV6]](s32) 1355 ; GREEDY: $vgpr7 = COPY [[UV7]](s32) 1356 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 1357 %soffset = add i32 %soffset.base, 4068 1358 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1359 ret <8 x float> %val 1360} 1361 1362define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i32> inreg %rsrc, i32 %soffset.base) { 1363 ; CHECK-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 1364 ; CHECK: bb.1 (%ir-block.0): 1365 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1366 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1367 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 1368 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 1369 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 1370 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1371 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1372 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4032 1373 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 1374 ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] 1375 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 1376 ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1377 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load 16, align 4) 1378 ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load 16, align 4) 1379 ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load 16 + 16, align 4) 1380 ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load 16 + 48, align 4) 1381 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) 1382 ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) 1383 ; CHECK: $vgpr0 = COPY [[UV]](s32) 1384 ; CHECK: $vgpr1 = COPY [[UV1]](s32) 1385 ; CHECK: $vgpr2 = COPY [[UV2]](s32) 1386 ; CHECK: $vgpr3 = COPY [[UV3]](s32) 1387 ; CHECK: $vgpr4 = COPY [[UV4]](s32) 1388 ; CHECK: $vgpr5 = COPY [[UV5]](s32) 1389 ; CHECK: $vgpr6 = COPY [[UV6]](s32) 1390 ; CHECK: $vgpr7 = COPY [[UV7]](s32) 1391 ; CHECK: $vgpr8 = COPY [[UV8]](s32) 1392 ; CHECK: $vgpr9 = COPY [[UV9]](s32) 1393 ; CHECK: $vgpr10 = COPY [[UV10]](s32) 1394 ; CHECK: $vgpr11 = COPY [[UV11]](s32) 1395 ; CHECK: $vgpr12 = COPY [[UV12]](s32) 1396 ; CHECK: $vgpr13 = COPY [[UV13]](s32) 1397 ; CHECK: $vgpr14 = COPY [[UV14]](s32) 1398 ; CHECK: $vgpr15 = COPY [[UV15]](s32) 1399 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 1400 ; GREEDY-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 1401 ; GREEDY: bb.1 (%ir-block.0): 1402 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1403 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1404 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 1405 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 1406 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 1407 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1408 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1409 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4032 1410 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 1411 ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] 1412 ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 1413 ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1414 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load 16, align 4) 1415 ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load 16, align 4) 1416 ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load 16 + 16, align 4) 1417 ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load 16 + 48, align 4) 1418 ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) 1419 ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) 1420 ; GREEDY: $vgpr0 = COPY [[UV]](s32) 1421 ; GREEDY: $vgpr1 = COPY [[UV1]](s32) 1422 ; GREEDY: $vgpr2 = COPY [[UV2]](s32) 1423 ; GREEDY: $vgpr3 = COPY [[UV3]](s32) 1424 ; GREEDY: $vgpr4 = COPY [[UV4]](s32) 1425 ; GREEDY: $vgpr5 = COPY [[UV5]](s32) 1426 ; GREEDY: $vgpr6 = COPY [[UV6]](s32) 1427 ; GREEDY: $vgpr7 = COPY [[UV7]](s32) 1428 ; GREEDY: $vgpr8 = COPY [[UV8]](s32) 1429 ; GREEDY: $vgpr9 = COPY [[UV9]](s32) 1430 ; GREEDY: $vgpr10 = COPY [[UV10]](s32) 1431 ; GREEDY: $vgpr11 = COPY [[UV11]](s32) 1432 ; GREEDY: $vgpr12 = COPY [[UV12]](s32) 1433 ; GREEDY: $vgpr13 = COPY [[UV13]](s32) 1434 ; GREEDY: $vgpr14 = COPY [[UV14]](s32) 1435 ; GREEDY: $vgpr15 = COPY [[UV15]](s32) 1436 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 1437 %soffset = add i32 %soffset.base, 4032 1438 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1439 ret <16 x float> %val 1440} 1441 1442define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i32> inreg %rsrc, i32 %soffset.base) { 1443 ; CHECK-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 1444 ; CHECK: bb.1 (%ir-block.0): 1445 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1446 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1447 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 1448 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 1449 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 1450 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1451 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1452 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4036 1453 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 1454 ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] 1455 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1456 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 1457 ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 1458 ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 + 16, align 4) 1459 ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 + 48, align 4) 1460 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) 1461 ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) 1462 ; CHECK: $vgpr0 = COPY [[UV]](s32) 1463 ; CHECK: $vgpr1 = COPY [[UV1]](s32) 1464 ; CHECK: $vgpr2 = COPY [[UV2]](s32) 1465 ; CHECK: $vgpr3 = COPY [[UV3]](s32) 1466 ; CHECK: $vgpr4 = COPY [[UV4]](s32) 1467 ; CHECK: $vgpr5 = COPY [[UV5]](s32) 1468 ; CHECK: $vgpr6 = COPY [[UV6]](s32) 1469 ; CHECK: $vgpr7 = COPY [[UV7]](s32) 1470 ; CHECK: $vgpr8 = COPY [[UV8]](s32) 1471 ; CHECK: $vgpr9 = COPY [[UV9]](s32) 1472 ; CHECK: $vgpr10 = COPY [[UV10]](s32) 1473 ; CHECK: $vgpr11 = COPY [[UV11]](s32) 1474 ; CHECK: $vgpr12 = COPY [[UV12]](s32) 1475 ; CHECK: $vgpr13 = COPY [[UV13]](s32) 1476 ; CHECK: $vgpr14 = COPY [[UV14]](s32) 1477 ; CHECK: $vgpr15 = COPY [[UV15]](s32) 1478 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 1479 ; GREEDY-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 1480 ; GREEDY: bb.1 (%ir-block.0): 1481 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 1482 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1483 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 1484 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 1485 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 1486 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1487 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1488 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4036 1489 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 1490 ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] 1491 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1492 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 1493 ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 1494 ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 + 16, align 4) 1495 ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 + 48, align 4) 1496 ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) 1497 ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) 1498 ; GREEDY: $vgpr0 = COPY [[UV]](s32) 1499 ; GREEDY: $vgpr1 = COPY [[UV1]](s32) 1500 ; GREEDY: $vgpr2 = COPY [[UV2]](s32) 1501 ; GREEDY: $vgpr3 = COPY [[UV3]](s32) 1502 ; GREEDY: $vgpr4 = COPY [[UV4]](s32) 1503 ; GREEDY: $vgpr5 = COPY [[UV5]](s32) 1504 ; GREEDY: $vgpr6 = COPY [[UV6]](s32) 1505 ; GREEDY: $vgpr7 = COPY [[UV7]](s32) 1506 ; GREEDY: $vgpr8 = COPY [[UV8]](s32) 1507 ; GREEDY: $vgpr9 = COPY [[UV9]](s32) 1508 ; GREEDY: $vgpr10 = COPY [[UV10]](s32) 1509 ; GREEDY: $vgpr11 = COPY [[UV11]](s32) 1510 ; GREEDY: $vgpr12 = COPY [[UV12]](s32) 1511 ; GREEDY: $vgpr13 = COPY [[UV13]](s32) 1512 ; GREEDY: $vgpr14 = COPY [[UV14]](s32) 1513 ; GREEDY: $vgpr15 = COPY [[UV15]](s32) 1514 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 1515 %soffset = add i32 %soffset.base, 4036 1516 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1517 ret <16 x float> %val 1518} 1519 1520; Waterfall loop due to resource being VGPR 1521define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %soffset) { 1522 ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_rsrc 1523 ; CHECK: bb.1 (%ir-block.0): 1524 ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 1525 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1526 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 1527 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 1528 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 1529 ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1530 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1531 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) 1532 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 1533 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1534 ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF 1535 ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 1536 ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 1537 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 1538 ; CHECK: bb.2: 1539 ; CHECK: successors: %bb.3, %bb.2 1540 ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.2 1541 ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %8(s32), %bb.2 1542 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 1543 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 1544 ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 1545 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 1546 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 1547 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 1548 ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 1549 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 1550 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 1551 ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 1552 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4) 1553 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1554 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1555 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 1556 ; CHECK: bb.3: 1557 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 1558 ; CHECK: bb.4: 1559 ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 1560 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 1561 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc 1562 ; GREEDY: bb.1 (%ir-block.0): 1563 ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 1564 ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1565 ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 1566 ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 1567 ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 1568 ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1569 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1570 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) 1571 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 1572 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1573 ; GREEDY: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF 1574 ; GREEDY: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 1575 ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 1576 ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 1577 ; GREEDY: bb.2: 1578 ; GREEDY: successors: %bb.3, %bb.2 1579 ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.2 1580 ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %8(s32), %bb.2 1581 ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 1582 ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 1583 ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 1584 ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 1585 ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 1586 ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 1587 ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 1588 ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 1589 ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 1590 ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 1591 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4) 1592 ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1593 ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1594 ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec 1595 ; GREEDY: bb.3: 1596 ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 1597 ; GREEDY: bb.4: 1598 ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 1599 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 1600 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1601 ret float %val 1602} 1603 1604; Use the offset inside the waterfall loop 1605define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %rsrc, i32 inreg %soffset.base) { 1606 ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 1607 ; CHECK: bb.1 (%ir-block.0): 1608 ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 1609 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1610 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 1611 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 1612 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 1613 ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1614 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1615 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 1616 ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] 1617 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1618 ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1619 ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF 1620 ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 1621 ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 1622 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 1623 ; CHECK: bb.2: 1624 ; CHECK: successors: %bb.3, %bb.2 1625 ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %18, %bb.2 1626 ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2 1627 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 1628 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 1629 ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 1630 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 1631 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 1632 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 1633 ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 1634 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 1635 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 1636 ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 1637 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load 4) 1638 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1639 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1640 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 1641 ; CHECK: bb.3: 1642 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 1643 ; CHECK: bb.4: 1644 ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 1645 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 1646 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 1647 ; GREEDY: bb.1 (%ir-block.0): 1648 ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 1649 ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1650 ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 1651 ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 1652 ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 1653 ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1654 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1655 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 1656 ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] 1657 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1658 ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1659 ; GREEDY: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF 1660 ; GREEDY: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 1661 ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 1662 ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 1663 ; GREEDY: bb.2: 1664 ; GREEDY: successors: %bb.3, %bb.2 1665 ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %18, %bb.2 1666 ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2 1667 ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 1668 ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 1669 ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 1670 ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 1671 ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 1672 ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 1673 ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 1674 ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 1675 ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 1676 ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 1677 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load 4) 1678 ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1679 ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1680 ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec 1681 ; GREEDY: bb.3: 1682 ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 1683 ; GREEDY: bb.4: 1684 ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 1685 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 1686 %soffset = add i32 %soffset.base, 4092 1687 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1688 ret float %val 1689} 1690 1691; Scalar offset exceeds MUBUF limit, keep add out of the loop 1692define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) { 1693 ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 1694 ; CHECK: bb.1 (%ir-block.0): 1695 ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 1696 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1697 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 1698 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 1699 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 1700 ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1701 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1702 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 1703 ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] 1704 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) 1705 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 1706 ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1707 ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF 1708 ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 1709 ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 1710 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 1711 ; CHECK: bb.2: 1712 ; CHECK: successors: %bb.3, %bb.2 1713 ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %19, %bb.2 1714 ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2 1715 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 1716 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 1717 ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 1718 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 1719 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 1720 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 1721 ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 1722 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 1723 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 1724 ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 1725 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 4) 1726 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1727 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1728 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 1729 ; CHECK: bb.3: 1730 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 1731 ; CHECK: bb.4: 1732 ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 1733 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 1734 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 1735 ; GREEDY: bb.1 (%ir-block.0): 1736 ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 1737 ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1738 ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 1739 ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 1740 ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 1741 ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1742 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1743 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 1744 ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] 1745 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) 1746 ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 1747 ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1748 ; GREEDY: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF 1749 ; GREEDY: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 1750 ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 1751 ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 1752 ; GREEDY: bb.2: 1753 ; GREEDY: successors: %bb.3, %bb.2 1754 ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %19, %bb.2 1755 ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2 1756 ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 1757 ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 1758 ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 1759 ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 1760 ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 1761 ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 1762 ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 1763 ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 1764 ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 1765 ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 1766 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 4) 1767 ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1768 ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1769 ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec 1770 ; GREEDY: bb.3: 1771 ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 1772 ; GREEDY: bb.4: 1773 ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 1774 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 1775 %soffset = add i32 %soffset.base, 4096 1776 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 1777 ret float %val 1778} 1779 1780; Waterfall loop, but constant offset 1781define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) { 1782 ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 1783 ; CHECK: bb.1 (%ir-block.0): 1784 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 1785 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1786 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 1787 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 1788 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 1789 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1790 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 1791 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1792 ; CHECK: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 1793 ; CHECK: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1794 ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF 1795 ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 1796 ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 1797 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 1798 ; CHECK: bb.2: 1799 ; CHECK: successors: %bb.3, %bb.2 1800 ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.2 1801 ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %7(s32), %bb.2 1802 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 1803 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 1804 ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 1805 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 1806 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 1807 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 1808 ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 1809 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 1810 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 1811 ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 1812 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load 4 + 4095, align 1) 1813 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1814 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1815 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 1816 ; CHECK: bb.3: 1817 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 1818 ; CHECK: bb.4: 1819 ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 1820 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 1821 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 1822 ; GREEDY: bb.1 (%ir-block.0): 1823 ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 1824 ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1825 ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 1826 ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 1827 ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 1828 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1829 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 1830 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1831 ; GREEDY: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 1832 ; GREEDY: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1833 ; GREEDY: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF 1834 ; GREEDY: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 1835 ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 1836 ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 1837 ; GREEDY: bb.2: 1838 ; GREEDY: successors: %bb.3, %bb.2 1839 ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.2 1840 ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %7(s32), %bb.2 1841 ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 1842 ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 1843 ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 1844 ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 1845 ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 1846 ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 1847 ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 1848 ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 1849 ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 1850 ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 1851 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load 4 + 4095, align 1) 1852 ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1853 ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1854 ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec 1855 ; GREEDY: bb.3: 1856 ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 1857 ; GREEDY: bb.4: 1858 ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 1859 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 1860 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 0) 1861 ret float %val 1862} 1863 1864; Waterfall loop, but constant offset 1865define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) { 1866 ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 1867 ; CHECK: bb.1 (%ir-block.0): 1868 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 1869 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1870 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 1871 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 1872 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 1873 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1874 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 1875 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 1876 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 1877 ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1878 ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF 1879 ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 1880 ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 1881 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 1882 ; CHECK: bb.2: 1883 ; CHECK: successors: %bb.3, %bb.2 1884 ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.2 1885 ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %7(s32), %bb.2 1886 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 1887 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 1888 ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 1889 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 1890 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 1891 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 1892 ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 1893 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 1894 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 1895 ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 1896 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 4) 1897 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1898 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1899 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 1900 ; CHECK: bb.3: 1901 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 1902 ; CHECK: bb.4: 1903 ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 1904 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 1905 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 1906 ; GREEDY: bb.1 (%ir-block.0): 1907 ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 1908 ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1909 ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 1910 ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 1911 ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 1912 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1913 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 1914 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 1915 ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 1916 ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1917 ; GREEDY: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF 1918 ; GREEDY: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 1919 ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 1920 ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 1921 ; GREEDY: bb.2: 1922 ; GREEDY: successors: %bb.3, %bb.2 1923 ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.2 1924 ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %7(s32), %bb.2 1925 ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 1926 ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 1927 ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 1928 ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 1929 ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 1930 ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 1931 ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 1932 ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 1933 ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 1934 ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 1935 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 4) 1936 ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1937 ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1938 ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec 1939 ; GREEDY: bb.3: 1940 ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 1941 ; GREEDY: bb.4: 1942 ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 1943 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 1944 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 0) 1945 ret float %val 1946} 1947 1948; Need a waterfall loop, but the offset is scalar. 1949; Make sure the base offset is added to each split load. 1950define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %rsrc, i32 inreg %soffset.base) { 1951 ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 1952 ; CHECK: bb.1 (%ir-block.0): 1953 ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 1954 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 1955 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 1956 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 1957 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 1958 ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 1959 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 1960 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 1961 ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] 1962 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1963 ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 1964 ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 1965 ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 1966 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 1967 ; CHECK: bb.2: 1968 ; CHECK: successors: %bb.3, %bb.2 1969 ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 1970 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 1971 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 1972 ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 1973 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 1974 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 1975 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 1976 ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 1977 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 1978 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 1979 ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 1980 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load 16, align 4) 1981 ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load 16, align 4) 1982 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1983 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1984 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 1985 ; CHECK: bb.3: 1986 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 1987 ; CHECK: bb.4: 1988 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) 1989 ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) 1990 ; CHECK: $vgpr0 = COPY [[UV2]](s32) 1991 ; CHECK: $vgpr1 = COPY [[UV3]](s32) 1992 ; CHECK: $vgpr2 = COPY [[UV4]](s32) 1993 ; CHECK: $vgpr3 = COPY [[UV5]](s32) 1994 ; CHECK: $vgpr4 = COPY [[UV6]](s32) 1995 ; CHECK: $vgpr5 = COPY [[UV7]](s32) 1996 ; CHECK: $vgpr6 = COPY [[UV8]](s32) 1997 ; CHECK: $vgpr7 = COPY [[UV9]](s32) 1998 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 1999 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 2000 ; GREEDY: bb.1 (%ir-block.0): 2001 ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2002 ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2003 ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 2004 ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 2005 ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 2006 ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 2007 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2008 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 2009 ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] 2010 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2011 ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2012 ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 2013 ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 2014 ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2015 ; GREEDY: bb.2: 2016 ; GREEDY: successors: %bb.3, %bb.2 2017 ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 2018 ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 2019 ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 2020 ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 2021 ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 2022 ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 2023 ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 2024 ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2025 ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 2026 ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2027 ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2028 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load 16, align 4) 2029 ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load 16, align 4) 2030 ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2031 ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2032 ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2033 ; GREEDY: bb.3: 2034 ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2035 ; GREEDY: bb.4: 2036 ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) 2037 ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) 2038 ; GREEDY: $vgpr0 = COPY [[UV2]](s32) 2039 ; GREEDY: $vgpr1 = COPY [[UV3]](s32) 2040 ; GREEDY: $vgpr2 = COPY [[UV4]](s32) 2041 ; GREEDY: $vgpr3 = COPY [[UV5]](s32) 2042 ; GREEDY: $vgpr4 = COPY [[UV6]](s32) 2043 ; GREEDY: $vgpr5 = COPY [[UV7]](s32) 2044 ; GREEDY: $vgpr6 = COPY [[UV8]](s32) 2045 ; GREEDY: $vgpr7 = COPY [[UV9]](s32) 2046 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2047 %soffset = add i32 %soffset.base, 4064 2048 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2049 ret <8 x float> %val 2050} 2051 2052; Need a waterfall loop, but the offset is scalar. 2053; Make sure the maximum offset isn't exeeded when splitting this 2054define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %rsrc, i32 inreg %soffset.base) { 2055 ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 2056 ; CHECK: bb.1 (%ir-block.0): 2057 ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2058 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2059 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 2060 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 2061 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 2062 ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 2063 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2064 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 2065 ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] 2066 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) 2067 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 2068 ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2069 ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 2070 ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 2071 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2072 ; CHECK: bb.2: 2073 ; CHECK: successors: %bb.3, %bb.2 2074 ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2 2075 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 2076 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 2077 ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 2078 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 2079 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 2080 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 2081 ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2082 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 2083 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2084 ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2085 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 2086 ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 2087 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2088 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2089 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2090 ; CHECK: bb.3: 2091 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2092 ; CHECK: bb.4: 2093 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) 2094 ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) 2095 ; CHECK: $vgpr0 = COPY [[UV2]](s32) 2096 ; CHECK: $vgpr1 = COPY [[UV3]](s32) 2097 ; CHECK: $vgpr2 = COPY [[UV4]](s32) 2098 ; CHECK: $vgpr3 = COPY [[UV5]](s32) 2099 ; CHECK: $vgpr4 = COPY [[UV6]](s32) 2100 ; CHECK: $vgpr5 = COPY [[UV7]](s32) 2101 ; CHECK: $vgpr6 = COPY [[UV8]](s32) 2102 ; CHECK: $vgpr7 = COPY [[UV9]](s32) 2103 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2104 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 2105 ; GREEDY: bb.1 (%ir-block.0): 2106 ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2107 ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2108 ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 2109 ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 2110 ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 2111 ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 2112 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2113 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 2114 ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] 2115 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) 2116 ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 2117 ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2118 ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 2119 ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 2120 ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2121 ; GREEDY: bb.2: 2122 ; GREEDY: successors: %bb.3, %bb.2 2123 ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2 2124 ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 2125 ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 2126 ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 2127 ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 2128 ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 2129 ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 2130 ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2131 ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 2132 ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2133 ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2134 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 2135 ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 2136 ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2137 ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2138 ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2139 ; GREEDY: bb.3: 2140 ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2141 ; GREEDY: bb.4: 2142 ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) 2143 ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) 2144 ; GREEDY: $vgpr0 = COPY [[UV2]](s32) 2145 ; GREEDY: $vgpr1 = COPY [[UV3]](s32) 2146 ; GREEDY: $vgpr2 = COPY [[UV4]](s32) 2147 ; GREEDY: $vgpr3 = COPY [[UV5]](s32) 2148 ; GREEDY: $vgpr4 = COPY [[UV6]](s32) 2149 ; GREEDY: $vgpr5 = COPY [[UV7]](s32) 2150 ; GREEDY: $vgpr6 = COPY [[UV8]](s32) 2151 ; GREEDY: $vgpr7 = COPY [[UV9]](s32) 2152 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2153 %soffset = add i32 %soffset.base, 4068 2154 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2155 ret <8 x float> %val 2156} 2157 2158define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) { 2159 ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 2160 ; CHECK: bb.1 (%ir-block.0): 2161 ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2162 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2163 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 2164 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 2165 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 2166 ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 2167 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2168 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 2169 ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] 2170 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) 2171 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 2172 ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2173 ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 2174 ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 2175 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2176 ; CHECK: bb.2: 2177 ; CHECK: successors: %bb.3, %bb.2 2178 ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2 2179 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 2180 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 2181 ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 2182 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 2183 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 2184 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 2185 ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2186 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 2187 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2188 ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2189 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 2190 ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 2191 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2192 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2193 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2194 ; CHECK: bb.3: 2195 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2196 ; CHECK: bb.4: 2197 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) 2198 ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) 2199 ; CHECK: $vgpr0 = COPY [[UV2]](s32) 2200 ; CHECK: $vgpr1 = COPY [[UV3]](s32) 2201 ; CHECK: $vgpr2 = COPY [[UV4]](s32) 2202 ; CHECK: $vgpr3 = COPY [[UV5]](s32) 2203 ; CHECK: $vgpr4 = COPY [[UV6]](s32) 2204 ; CHECK: $vgpr5 = COPY [[UV7]](s32) 2205 ; CHECK: $vgpr6 = COPY [[UV8]](s32) 2206 ; CHECK: $vgpr7 = COPY [[UV9]](s32) 2207 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2208 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 2209 ; GREEDY: bb.1 (%ir-block.0): 2210 ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 2211 ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2212 ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 2213 ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 2214 ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 2215 ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 2216 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2217 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 2218 ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] 2219 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) 2220 ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 2221 ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2222 ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 2223 ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 2224 ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2225 ; GREEDY: bb.2: 2226 ; GREEDY: successors: %bb.3, %bb.2 2227 ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2 2228 ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 2229 ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 2230 ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 2231 ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 2232 ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 2233 ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 2234 ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2235 ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 2236 ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2237 ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2238 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 2239 ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 2240 ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2241 ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2242 ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2243 ; GREEDY: bb.3: 2244 ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2245 ; GREEDY: bb.4: 2246 ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) 2247 ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) 2248 ; GREEDY: $vgpr0 = COPY [[UV2]](s32) 2249 ; GREEDY: $vgpr1 = COPY [[UV3]](s32) 2250 ; GREEDY: $vgpr2 = COPY [[UV4]](s32) 2251 ; GREEDY: $vgpr3 = COPY [[UV5]](s32) 2252 ; GREEDY: $vgpr4 = COPY [[UV6]](s32) 2253 ; GREEDY: $vgpr5 = COPY [[UV7]](s32) 2254 ; GREEDY: $vgpr6 = COPY [[UV8]](s32) 2255 ; GREEDY: $vgpr7 = COPY [[UV9]](s32) 2256 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2257 %soffset = add i32 %soffset.base, 4096 2258 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2259 ret <8 x float> %val 2260} 2261 2262define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000(<4 x i32> %rsrc, i32 %offset.base) { 2263 ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 2264 ; CHECK: bb.1 (%ir-block.0): 2265 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 2266 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2267 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 2268 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 2269 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 2270 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 2271 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2272 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5000 2273 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 2274 ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] 2275 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2276 ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 2277 ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 2278 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2279 ; CHECK: bb.2: 2280 ; CHECK: successors: %bb.3, %bb.2 2281 ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 2282 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 2283 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 2284 ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 2285 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 2286 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 2287 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 2288 ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2289 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 2290 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2291 ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2292 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 2293 ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 2294 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2295 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2296 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2297 ; CHECK: bb.3: 2298 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2299 ; CHECK: bb.4: 2300 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) 2301 ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) 2302 ; CHECK: $vgpr0 = COPY [[UV2]](s32) 2303 ; CHECK: $vgpr1 = COPY [[UV3]](s32) 2304 ; CHECK: $vgpr2 = COPY [[UV4]](s32) 2305 ; CHECK: $vgpr3 = COPY [[UV5]](s32) 2306 ; CHECK: $vgpr4 = COPY [[UV6]](s32) 2307 ; CHECK: $vgpr5 = COPY [[UV7]](s32) 2308 ; CHECK: $vgpr6 = COPY [[UV8]](s32) 2309 ; CHECK: $vgpr7 = COPY [[UV9]](s32) 2310 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2311 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 2312 ; GREEDY: bb.1 (%ir-block.0): 2313 ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 2314 ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2315 ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 2316 ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 2317 ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 2318 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 2319 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2320 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5000 2321 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 2322 ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] 2323 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2324 ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 2325 ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 2326 ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2327 ; GREEDY: bb.2: 2328 ; GREEDY: successors: %bb.3, %bb.2 2329 ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 2330 ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 2331 ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 2332 ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 2333 ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 2334 ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 2335 ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 2336 ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2337 ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 2338 ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2339 ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2340 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 2341 ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 2342 ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2343 ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2344 ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2345 ; GREEDY: bb.3: 2346 ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2347 ; GREEDY: bb.4: 2348 ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) 2349 ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) 2350 ; GREEDY: $vgpr0 = COPY [[UV2]](s32) 2351 ; GREEDY: $vgpr1 = COPY [[UV3]](s32) 2352 ; GREEDY: $vgpr2 = COPY [[UV4]](s32) 2353 ; GREEDY: $vgpr3 = COPY [[UV5]](s32) 2354 ; GREEDY: $vgpr4 = COPY [[UV6]](s32) 2355 ; GREEDY: $vgpr5 = COPY [[UV7]](s32) 2356 ; GREEDY: $vgpr6 = COPY [[UV8]](s32) 2357 ; GREEDY: $vgpr7 = COPY [[UV9]](s32) 2358 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2359 %soffset = add i32 %offset.base, 5000 2360 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2361 ret <8 x float> %val 2362} 2363 2364define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076(<4 x i32> %rsrc, i32 %offset.base) { 2365 ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 2366 ; CHECK: bb.1 (%ir-block.0): 2367 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 2368 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2369 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 2370 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 2371 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 2372 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 2373 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2374 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4076 2375 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 2376 ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] 2377 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2378 ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 2379 ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 2380 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2381 ; CHECK: bb.2: 2382 ; CHECK: successors: %bb.3, %bb.2 2383 ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 2384 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 2385 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 2386 ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 2387 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 2388 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 2389 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 2390 ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2391 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 2392 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2393 ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2394 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 2395 ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 2396 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2397 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2398 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2399 ; CHECK: bb.3: 2400 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2401 ; CHECK: bb.4: 2402 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) 2403 ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) 2404 ; CHECK: $vgpr0 = COPY [[UV2]](s32) 2405 ; CHECK: $vgpr1 = COPY [[UV3]](s32) 2406 ; CHECK: $vgpr2 = COPY [[UV4]](s32) 2407 ; CHECK: $vgpr3 = COPY [[UV5]](s32) 2408 ; CHECK: $vgpr4 = COPY [[UV6]](s32) 2409 ; CHECK: $vgpr5 = COPY [[UV7]](s32) 2410 ; CHECK: $vgpr6 = COPY [[UV8]](s32) 2411 ; CHECK: $vgpr7 = COPY [[UV9]](s32) 2412 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2413 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 2414 ; GREEDY: bb.1 (%ir-block.0): 2415 ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 2416 ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2417 ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 2418 ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 2419 ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 2420 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 2421 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2422 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4076 2423 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 2424 ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] 2425 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2426 ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 2427 ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 2428 ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2429 ; GREEDY: bb.2: 2430 ; GREEDY: successors: %bb.3, %bb.2 2431 ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 2432 ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 2433 ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 2434 ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 2435 ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 2436 ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 2437 ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 2438 ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2439 ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 2440 ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2441 ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2442 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 2443 ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 2444 ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2445 ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2446 ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2447 ; GREEDY: bb.3: 2448 ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2449 ; GREEDY: bb.4: 2450 ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) 2451 ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) 2452 ; GREEDY: $vgpr0 = COPY [[UV2]](s32) 2453 ; GREEDY: $vgpr1 = COPY [[UV3]](s32) 2454 ; GREEDY: $vgpr2 = COPY [[UV4]](s32) 2455 ; GREEDY: $vgpr3 = COPY [[UV5]](s32) 2456 ; GREEDY: $vgpr4 = COPY [[UV6]](s32) 2457 ; GREEDY: $vgpr5 = COPY [[UV7]](s32) 2458 ; GREEDY: $vgpr6 = COPY [[UV8]](s32) 2459 ; GREEDY: $vgpr7 = COPY [[UV9]](s32) 2460 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2461 %soffset = add i32 %offset.base, 4076 2462 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2463 ret <8 x float> %val 2464} 2465 2466define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080(<4 x i32> %rsrc, i32 %offset.base) { 2467 ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 2468 ; CHECK: bb.1 (%ir-block.0): 2469 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 2470 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2471 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 2472 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 2473 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 2474 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 2475 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2476 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4080 2477 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 2478 ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] 2479 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2480 ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 2481 ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 2482 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2483 ; CHECK: bb.2: 2484 ; CHECK: successors: %bb.3, %bb.2 2485 ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 2486 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 2487 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 2488 ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 2489 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 2490 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 2491 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 2492 ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2493 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 2494 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2495 ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2496 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 2497 ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 2498 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2499 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2500 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2501 ; CHECK: bb.3: 2502 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2503 ; CHECK: bb.4: 2504 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) 2505 ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) 2506 ; CHECK: $vgpr0 = COPY [[UV2]](s32) 2507 ; CHECK: $vgpr1 = COPY [[UV3]](s32) 2508 ; CHECK: $vgpr2 = COPY [[UV4]](s32) 2509 ; CHECK: $vgpr3 = COPY [[UV5]](s32) 2510 ; CHECK: $vgpr4 = COPY [[UV6]](s32) 2511 ; CHECK: $vgpr5 = COPY [[UV7]](s32) 2512 ; CHECK: $vgpr6 = COPY [[UV8]](s32) 2513 ; CHECK: $vgpr7 = COPY [[UV9]](s32) 2514 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2515 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 2516 ; GREEDY: bb.1 (%ir-block.0): 2517 ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 2518 ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2519 ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 2520 ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 2521 ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 2522 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 2523 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2524 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4080 2525 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 2526 ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] 2527 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2528 ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 2529 ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 2530 ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2531 ; GREEDY: bb.2: 2532 ; GREEDY: successors: %bb.3, %bb.2 2533 ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 2534 ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 2535 ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 2536 ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 2537 ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 2538 ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 2539 ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 2540 ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2541 ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 2542 ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2543 ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2544 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 2545 ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) 2546 ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2547 ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2548 ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2549 ; GREEDY: bb.3: 2550 ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2551 ; GREEDY: bb.4: 2552 ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) 2553 ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) 2554 ; GREEDY: $vgpr0 = COPY [[UV2]](s32) 2555 ; GREEDY: $vgpr1 = COPY [[UV3]](s32) 2556 ; GREEDY: $vgpr2 = COPY [[UV4]](s32) 2557 ; GREEDY: $vgpr3 = COPY [[UV5]](s32) 2558 ; GREEDY: $vgpr4 = COPY [[UV6]](s32) 2559 ; GREEDY: $vgpr5 = COPY [[UV7]](s32) 2560 ; GREEDY: $vgpr6 = COPY [[UV8]](s32) 2561 ; GREEDY: $vgpr7 = COPY [[UV9]](s32) 2562 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2563 %soffset = add i32 %offset.base, 4080 2564 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) 2565 ret <8 x float> %val 2566} 2567 2568define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064(<4 x i32> %rsrc, i32 %offset.base) { 2569 ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 2570 ; CHECK: bb.1 (%ir-block.0): 2571 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 2572 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2573 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 2574 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 2575 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 2576 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2577 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 2578 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2579 ; CHECK: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 2580 ; CHECK: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2581 ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 2582 ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 2583 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2584 ; CHECK: bb.2: 2585 ; CHECK: successors: %bb.3, %bb.2 2586 ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 2587 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 2588 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 2589 ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 2590 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 2591 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 2592 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 2593 ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2594 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 2595 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2596 ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2597 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4064, 0, 0 :: (dereferenceable invariant load 16 + 4064, align 4) 2598 ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4080, 0, 0 :: (dereferenceable invariant load 16 + 4064, align 4) 2599 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2600 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2601 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2602 ; CHECK: bb.3: 2603 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2604 ; CHECK: bb.4: 2605 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) 2606 ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) 2607 ; CHECK: $vgpr0 = COPY [[UV2]](s32) 2608 ; CHECK: $vgpr1 = COPY [[UV3]](s32) 2609 ; CHECK: $vgpr2 = COPY [[UV4]](s32) 2610 ; CHECK: $vgpr3 = COPY [[UV5]](s32) 2611 ; CHECK: $vgpr4 = COPY [[UV6]](s32) 2612 ; CHECK: $vgpr5 = COPY [[UV7]](s32) 2613 ; CHECK: $vgpr6 = COPY [[UV8]](s32) 2614 ; CHECK: $vgpr7 = COPY [[UV9]](s32) 2615 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2616 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 2617 ; GREEDY: bb.1 (%ir-block.0): 2618 ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 2619 ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2620 ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 2621 ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 2622 ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 2623 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2624 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 2625 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2626 ; GREEDY: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 2627 ; GREEDY: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2628 ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 2629 ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) 2630 ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 2631 ; GREEDY: bb.2: 2632 ; GREEDY: successors: %bb.3, %bb.2 2633 ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 2634 ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 2635 ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 2636 ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 2637 ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 2638 ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 2639 ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 2640 ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2641 ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 2642 ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 2643 ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 2644 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4064, 0, 0 :: (dereferenceable invariant load 16 + 4064, align 4) 2645 ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4080, 0, 0 :: (dereferenceable invariant load 16 + 4064, align 4) 2646 ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 2647 ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2648 ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec 2649 ; GREEDY: bb.3: 2650 ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 2651 ; GREEDY: bb.4: 2652 ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) 2653 ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) 2654 ; GREEDY: $vgpr0 = COPY [[UV2]](s32) 2655 ; GREEDY: $vgpr1 = COPY [[UV3]](s32) 2656 ; GREEDY: $vgpr2 = COPY [[UV4]](s32) 2657 ; GREEDY: $vgpr3 = COPY [[UV5]](s32) 2658 ; GREEDY: $vgpr4 = COPY [[UV6]](s32) 2659 ; GREEDY: $vgpr5 = COPY [[UV7]](s32) 2660 ; GREEDY: $vgpr6 = COPY [[UV8]](s32) 2661 ; GREEDY: $vgpr7 = COPY [[UV9]](s32) 2662 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 2663 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0) 2664 ret <8 x float> %val 2665} 2666 2667define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { 2668 ; CHECK-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr 2669 ; CHECK: bb.1 (%ir-block.0): 2670 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 2671 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 2672 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 2673 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 2674 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 2675 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2676 ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 2677 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2678 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) 2679 ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] 2680 ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2681 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load 4) 2682 ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 2683 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 2684 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr 2685 ; GREEDY: bb.1 (%ir-block.0): 2686 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 2687 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 2688 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 2689 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 2690 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 2691 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2692 ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 2693 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2694 ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) 2695 ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] 2696 ; GREEDY: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2697 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load 4) 2698 ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 2699 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 2700 %offset = add i32 %offset.v, %offset.s 2701 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) 2702 ret float %val 2703} 2704 2705define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { 2706 ; CHECK-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr 2707 ; CHECK: bb.1 (%ir-block.0): 2708 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 2709 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 2710 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 2711 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 2712 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 2713 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2714 ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 2715 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2716 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) 2717 ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] 2718 ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2719 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load 4) 2720 ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 2721 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 2722 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr 2723 ; GREEDY: bb.1 (%ir-block.0): 2724 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 2725 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 2726 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 2727 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 2728 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 2729 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2730 ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 2731 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2732 ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) 2733 ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] 2734 ; GREEDY: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2735 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load 4) 2736 ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 2737 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 2738 %offset = add i32 %offset.s, %offset.v 2739 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) 2740 ret float %val 2741} 2742 2743define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { 2744 ; CHECK-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm 2745 ; CHECK: bb.1 (%ir-block.0): 2746 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 2747 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 2748 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 2749 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 2750 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 2751 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2752 ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 2753 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2754 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) 2755 ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] 2756 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 2757 ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 2758 ; CHECK: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] 2759 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 2760 ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2761 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load 4) 2762 ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 2763 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 2764 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm 2765 ; GREEDY: bb.1 (%ir-block.0): 2766 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 2767 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 2768 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 2769 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 2770 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 2771 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2772 ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 2773 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2774 ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) 2775 ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] 2776 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 2777 ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 2778 ; GREEDY: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] 2779 ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 2780 ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2781 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load 4) 2782 ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 2783 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 2784 %offset.base = add i32 %offset.v, %offset.s 2785 %offset = add i32 %offset.base, 1024 2786 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) 2787 ret float %val 2788} 2789 2790define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { 2791 ; CHECK-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm 2792 ; CHECK: bb.1 (%ir-block.0): 2793 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 2794 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 2795 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 2796 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 2797 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 2798 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2799 ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 2800 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2801 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) 2802 ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] 2803 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 2804 ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 2805 ; CHECK: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] 2806 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 2807 ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2808 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load 4) 2809 ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 2810 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 2811 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm 2812 ; GREEDY: bb.1 (%ir-block.0): 2813 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 2814 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 2815 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 2816 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 2817 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 2818 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2819 ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 2820 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2821 ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) 2822 ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] 2823 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 2824 ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 2825 ; GREEDY: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] 2826 ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 2827 ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2828 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load 4) 2829 ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 2830 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 2831 %offset.base = add i32 %offset.s, %offset.v 2832 %offset = add i32 %offset.base, 1024 2833 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) 2834 ret float %val 2835} 2836 2837; TODO: Ideally this would be reassociated to fold. 2838define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { 2839 ; CHECK-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr 2840 ; CHECK: bb.1 (%ir-block.0): 2841 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 2842 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 2843 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 2844 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 2845 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 2846 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2847 ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 2848 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2849 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 2850 ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY5]], [[C]] 2851 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) 2852 ; CHECK: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] 2853 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2854 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load 4) 2855 ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 2856 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 2857 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr 2858 ; GREEDY: bb.1 (%ir-block.0): 2859 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 2860 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 2861 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 2862 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 2863 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 2864 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2865 ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 2866 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2867 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 2868 ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY5]], [[C]] 2869 ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) 2870 ; GREEDY: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] 2871 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2872 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load 4) 2873 ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 2874 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 2875 %offset.base = add i32 %offset.s, 1024 2876 %offset = add i32 %offset.base, %offset.v 2877 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) 2878 ret float %val 2879} 2880 2881define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { 2882 ; CHECK-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr 2883 ; CHECK: bb.1 (%ir-block.0): 2884 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 2885 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 2886 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 2887 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 2888 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 2889 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2890 ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 2891 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2892 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 2893 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 2894 ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] 2895 ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) 2896 ; CHECK: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] 2897 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2898 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load 4) 2899 ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 2900 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 2901 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr 2902 ; GREEDY: bb.1 (%ir-block.0): 2903 ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 2904 ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 2905 ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 2906 ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 2907 ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 2908 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 2909 ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 2910 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) 2911 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 2912 ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) 2913 ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] 2914 ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) 2915 ; GREEDY: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] 2916 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 2917 ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load 4) 2918 ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) 2919 ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 2920 %offset.base = add i32 %offset.v, 1024 2921 %offset = add i32 %offset.base, %offset.s 2922 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) 2923 ret float %val 2924} 2925 2926declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) 2927declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg) 2928declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32 immarg) 2929declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32 immarg) 2930declare <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32>, i32, i32 immarg) 2931declare <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32>, i32, i32 immarg) 2932 2933declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32 immarg) 2934declare <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32>, i32, i32 immarg) 2935declare <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32>, i32, i32 immarg) 2936declare <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32>, i32, i32 immarg) 2937declare <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32>, i32, i32 immarg) 2938declare <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32>, i32, i32 immarg) 2939 2940declare i96 @llvm.amdgcn.s.buffer.load.i96(<4 x i32>, i32, i32 immarg) 2941declare i256 @llvm.amdgcn.s.buffer.load.i256(<4 x i32>, i32, i32 immarg) 2942declare i512 @llvm.amdgcn.s.buffer.load.i512(<4 x i32>, i32, i32 immarg) 2943 2944declare <16 x i16> @llvm.amdgcn.s.buffer.load.v16i16(<4 x i32>, i32, i32 immarg) 2945declare <32 x i16> @llvm.amdgcn.s.buffer.load.v32i16(<4 x i32>, i32, i32 immarg) 2946 2947declare <4 x i64> @llvm.amdgcn.s.buffer.load.v4i64(<4 x i32>, i32, i32 immarg) 2948declare <8 x i64> @llvm.amdgcn.s.buffer.load.v8i64(<4 x i32>, i32, i32 immarg) 2949 2950declare <4 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v4p1i8(<4 x i32>, i32, i32 immarg) 2951declare <8 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v8p1i8(<4 x i32>, i32, i32 immarg) 2952