1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s 4 5define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 6 ; GFX9-LABEL: name: load_1d 7 ; GFX9: bb.1.main_body: 8 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 9 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 10 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 11 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 12 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 13 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 14 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 15 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 16 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 17 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 18 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 19 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 20 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 21 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 22 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 23 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 24 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 25 ; GFX9: $vgpr0 = COPY [[UV]](s32) 26 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 27 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 28 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 29 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 30 ; GFX10NSA-LABEL: name: load_1d 31 ; GFX10NSA: bb.1.main_body: 32 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 33 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 34 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 35 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 36 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 37 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 38 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 39 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 40 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 41 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 42 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 43 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 44 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 45 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 46 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 47 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 48 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 49 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 50 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 51 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 52 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 53 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 54main_body: 55 %s = extractelement <2 x i16> %coords, i32 0 56 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 57 ret <4 x float> %v 58} 59 60define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 61 ; GFX9-LABEL: name: load_2d 62 ; GFX9: bb.1.main_body: 63 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 64 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 65 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 66 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 67 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 68 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 69 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 70 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 71 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 72 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 73 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 74 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 75 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 76 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 77 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 78 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 79 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 80 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 81 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 82 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 83 ; GFX9: $vgpr0 = COPY [[UV]](s32) 84 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 85 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 86 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 87 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 88 ; GFX10NSA-LABEL: name: load_2d 89 ; GFX10NSA: bb.1.main_body: 90 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 91 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 92 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 93 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 94 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 95 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 96 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 97 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 98 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 99 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 100 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 101 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 102 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 103 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 104 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 105 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 106 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 107 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 108 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 109 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 110 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 111 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 112 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 113 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 114 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 115main_body: 116 %s = extractelement <2 x i16> %coords, i32 0 117 %t = extractelement <2 x i16> %coords, i32 1 118 %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0) 119 ret <4 x float> %v 120} 121 122define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 123 ; GFX9-LABEL: name: load_3d 124 ; GFX9: bb.1.main_body: 125 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 126 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 127 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 128 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 129 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 130 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 131 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 132 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 133 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 134 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 135 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 136 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 137 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 138 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 139 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 140 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 141 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 142 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 143 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 144 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 145 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 146 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 147 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 148 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 149 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 150 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 151 ; GFX9: $vgpr0 = COPY [[UV]](s32) 152 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 153 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 154 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 155 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 156 ; GFX10NSA-LABEL: name: load_3d 157 ; GFX10NSA: bb.1.main_body: 158 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 159 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 160 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 161 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 162 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 163 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 164 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 165 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 166 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 167 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 168 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 169 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 170 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 171 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 172 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 173 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 174 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 175 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 176 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 177 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 178 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 179 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 180 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 181 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 182 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 183 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 184 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 185 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 186 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 187 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 188 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 189main_body: 190 %s = extractelement <2 x i16> %coords_lo, i32 0 191 %t = extractelement <2 x i16> %coords_lo, i32 1 192 %r = extractelement <2 x i16> %coords_hi, i32 0 193 %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) 194 ret <4 x float> %v 195} 196 197define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 198 ; GFX9-LABEL: name: load_cube 199 ; GFX9: bb.1.main_body: 200 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 201 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 202 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 203 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 204 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 205 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 206 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 207 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 208 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 209 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 210 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 211 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 212 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 213 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 214 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 215 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 216 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 217 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 218 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 219 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 220 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 221 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 222 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 223 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 224 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 225 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 226 ; GFX9: $vgpr0 = COPY [[UV]](s32) 227 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 228 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 229 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 230 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 231 ; GFX10NSA-LABEL: name: load_cube 232 ; GFX10NSA: bb.1.main_body: 233 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 234 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 235 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 236 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 237 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 238 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 239 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 240 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 241 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 242 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 243 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 244 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 245 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 246 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 247 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 248 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 249 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 250 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 251 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 252 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 253 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 254 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 255 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 256 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 257 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 258 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 259 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 260 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 261 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 262 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 263 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 264main_body: 265 %s = extractelement <2 x i16> %coords_lo, i32 0 266 %t = extractelement <2 x i16> %coords_lo, i32 1 267 %slice = extractelement <2 x i16> %coords_hi, i32 0 268 %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) 269 ret <4 x float> %v 270} 271 272define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 273 ; GFX9-LABEL: name: load_1darray 274 ; GFX9: bb.1.main_body: 275 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 276 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 277 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 278 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 279 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 280 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 281 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 282 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 283 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 284 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 285 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 286 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 287 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 288 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 289 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 290 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 291 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 292 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 293 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 294 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 295 ; GFX9: $vgpr0 = COPY [[UV]](s32) 296 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 297 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 298 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 299 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 300 ; GFX10NSA-LABEL: name: load_1darray 301 ; GFX10NSA: bb.1.main_body: 302 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 303 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 304 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 305 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 306 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 307 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 308 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 309 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 310 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 311 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 312 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 313 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 314 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 315 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 316 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 317 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 318 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 319 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 320 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 321 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 322 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 323 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 324 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 325 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 326 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 327main_body: 328 %s = extractelement <2 x i16> %coords, i32 0 329 %slice = extractelement <2 x i16> %coords, i32 1 330 %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32 15, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) 331 ret <4 x float> %v 332} 333 334define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 335 ; GFX9-LABEL: name: load_2darray 336 ; GFX9: bb.1.main_body: 337 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 338 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 339 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 340 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 341 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 342 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 343 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 344 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 345 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 346 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 347 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 348 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 349 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 350 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 351 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 352 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 353 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 354 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 355 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 356 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 357 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 358 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 359 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 360 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 361 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 362 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 363 ; GFX9: $vgpr0 = COPY [[UV]](s32) 364 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 365 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 366 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 367 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 368 ; GFX10NSA-LABEL: name: load_2darray 369 ; GFX10NSA: bb.1.main_body: 370 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 371 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 372 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 373 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 374 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 375 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 376 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 377 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 378 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 379 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 380 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 381 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 382 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 383 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 384 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 385 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 386 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 387 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 388 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 389 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 390 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 391 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 392 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 393 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 394 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 395 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 396 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 397 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 398 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 399 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 400 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 401main_body: 402 %s = extractelement <2 x i16> %coords_lo, i32 0 403 %t = extractelement <2 x i16> %coords_lo, i32 1 404 %slice = extractelement <2 x i16> %coords_hi, i32 0 405 %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) 406 ret <4 x float> %v 407} 408 409define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 410 ; GFX9-LABEL: name: load_2dmsaa 411 ; GFX9: bb.1.main_body: 412 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 413 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 414 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 415 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 416 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 417 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 418 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 419 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 420 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 421 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 422 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 423 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 424 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 425 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 426 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 427 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 428 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 429 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 430 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 431 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 432 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 433 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 434 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 435 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 436 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 437 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 438 ; GFX9: $vgpr0 = COPY [[UV]](s32) 439 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 440 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 441 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 442 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 443 ; GFX10NSA-LABEL: name: load_2dmsaa 444 ; GFX10NSA: bb.1.main_body: 445 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 446 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 447 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 448 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 449 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 450 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 451 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 452 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 453 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 454 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 455 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 456 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 457 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 458 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 459 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 460 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 461 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 462 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 463 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 464 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 465 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 466 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 467 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 468 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 469 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 470 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 471 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 472 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 473 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 474 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 475 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 476main_body: 477 %s = extractelement <2 x i16> %coords_lo, i32 0 478 %t = extractelement <2 x i16> %coords_lo, i32 1 479 %fragid = extractelement <2 x i16> %coords_hi, i32 0 480 %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 481 ret <4 x float> %v 482} 483 484define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 485 ; GFX9-LABEL: name: load_2darraymsaa 486 ; GFX9: bb.1.main_body: 487 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 488 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 489 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 490 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 491 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 492 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 493 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 494 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 495 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 496 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 497 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 498 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 499 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 500 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 501 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 502 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 503 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 504 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 505 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 506 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 507 ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 508 ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 509 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 510 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32) 511 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 512 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 513 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 514 ; GFX9: $vgpr0 = COPY [[UV]](s32) 515 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 516 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 517 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 518 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 519 ; GFX10NSA-LABEL: name: load_2darraymsaa 520 ; GFX10NSA: bb.1.main_body: 521 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 522 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 523 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 524 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 525 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 526 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 527 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 528 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 529 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 530 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 531 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 532 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 533 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 534 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 535 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 536 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 537 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 538 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 539 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 540 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 541 ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 542 ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 543 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 544 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32) 545 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 546 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 547 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 548 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 549 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 550 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 551 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 552 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 553main_body: 554 %s = extractelement <2 x i16> %coords_lo, i32 0 555 %t = extractelement <2 x i16> %coords_lo, i32 1 556 %slice = extractelement <2 x i16> %coords_hi, i32 0 557 %fragid = extractelement <2 x i16> %coords_hi, i32 1 558 %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 559 ret <4 x float> %v 560} 561 562define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 563 ; GFX9-LABEL: name: load_mip_1d 564 ; GFX9: bb.1.main_body: 565 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 566 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 567 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 568 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 569 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 570 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 571 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 572 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 573 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 574 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 575 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 576 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 577 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 578 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 579 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 580 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 581 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 582 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 583 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 584 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 585 ; GFX9: $vgpr0 = COPY [[UV]](s32) 586 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 587 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 588 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 589 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 590 ; GFX10NSA-LABEL: name: load_mip_1d 591 ; GFX10NSA: bb.1.main_body: 592 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 593 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 594 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 595 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 596 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 597 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 598 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 599 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 600 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 601 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 602 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 603 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 604 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 605 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 606 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 607 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 608 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 609 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 610 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 611 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 612 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 613 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 614 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 615 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 616 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 617main_body: 618 %s = extractelement <2 x i16> %coords, i32 0 619 %mip = extractelement <2 x i16> %coords, i32 1 620 %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 15, i16 %s, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 621 ret <4 x float> %v 622} 623 624define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 625 ; GFX9-LABEL: name: load_mip_2d 626 ; GFX9: bb.1.main_body: 627 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 628 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 629 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 630 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 631 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 632 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 633 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 634 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 635 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 636 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 637 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 638 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 639 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 640 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 641 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 642 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 643 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 644 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 645 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 646 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 647 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 648 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 649 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 650 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 651 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 652 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 653 ; GFX9: $vgpr0 = COPY [[UV]](s32) 654 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 655 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 656 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 657 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 658 ; GFX10NSA-LABEL: name: load_mip_2d 659 ; GFX10NSA: bb.1.main_body: 660 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 661 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 662 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 663 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 664 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 665 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 666 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 667 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 668 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 669 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 670 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 671 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 672 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 673 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 674 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 675 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 676 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 677 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 678 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 679 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 680 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 681 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 682 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 683 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 684 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 685 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 686 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 687 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 688 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 689 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 690 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 691main_body: 692 %s = extractelement <2 x i16> %coords_lo, i32 0 693 %t = extractelement <2 x i16> %coords_lo, i32 1 694 %mip = extractelement <2 x i16> %coords_hi, i32 0 695 %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 696 ret <4 x float> %v 697} 698 699define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 700 ; GFX9-LABEL: name: load_mip_3d 701 ; GFX9: bb.1.main_body: 702 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 703 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 704 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 705 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 706 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 707 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 708 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 709 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 710 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 711 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 712 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 713 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 714 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 715 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 716 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 717 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 718 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 719 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 720 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 721 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 722 ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 723 ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 724 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 725 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32) 726 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 727 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 728 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 729 ; GFX9: $vgpr0 = COPY [[UV]](s32) 730 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 731 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 732 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 733 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 734 ; GFX10NSA-LABEL: name: load_mip_3d 735 ; GFX10NSA: bb.1.main_body: 736 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 737 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 738 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 739 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 740 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 741 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 742 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 743 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 744 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 745 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 746 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 747 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 748 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 749 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 750 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 751 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 752 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 753 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 754 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 755 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 756 ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 757 ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 758 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 759 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32) 760 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 761 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 762 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 763 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 764 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 765 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 766 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 767 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 768main_body: 769 %s = extractelement <2 x i16> %coords_lo, i32 0 770 %t = extractelement <2 x i16> %coords_lo, i32 1 771 %r = extractelement <2 x i16> %coords_hi, i32 0 772 %mip = extractelement <2 x i16> %coords_hi, i32 1 773 %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %r, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 774 ret <4 x float> %v 775} 776 777define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 778 ; GFX9-LABEL: name: load_mip_cube 779 ; GFX9: bb.1.main_body: 780 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 781 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 782 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 783 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 784 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 785 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 786 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 787 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 788 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 789 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 790 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 791 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 792 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 793 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 794 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 795 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 796 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 797 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 798 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 799 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 800 ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 801 ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 802 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 803 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32) 804 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 805 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 806 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 807 ; GFX9: $vgpr0 = COPY [[UV]](s32) 808 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 809 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 810 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 811 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 812 ; GFX10NSA-LABEL: name: load_mip_cube 813 ; GFX10NSA: bb.1.main_body: 814 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 815 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 816 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 817 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 818 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 819 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 820 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 821 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 822 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 823 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 824 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 825 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 826 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 827 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 828 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 829 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 830 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 831 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 832 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 833 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 834 ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 835 ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 836 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 837 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32) 838 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 839 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 840 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 841 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 842 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 843 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 844 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 845 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 846main_body: 847 %s = extractelement <2 x i16> %coords_lo, i32 0 848 %t = extractelement <2 x i16> %coords_lo, i32 1 849 %slice = extractelement <2 x i16> %coords_hi, i32 0 850 %mip = extractelement <2 x i16> %coords_hi, i32 1 851 %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 852 ret <4 x float> %v 853} 854 855define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 856 ; GFX9-LABEL: name: load_mip_1darray 857 ; GFX9: bb.1.main_body: 858 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 859 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 860 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 861 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 862 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 863 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 864 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 865 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 866 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 867 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 868 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 869 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 870 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 871 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 872 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 873 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 874 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 875 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 876 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 877 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 878 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 879 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 880 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 881 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 882 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 883 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 884 ; GFX9: $vgpr0 = COPY [[UV]](s32) 885 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 886 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 887 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 888 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 889 ; GFX10NSA-LABEL: name: load_mip_1darray 890 ; GFX10NSA: bb.1.main_body: 891 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 892 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 893 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 894 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 895 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 896 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 897 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 898 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 899 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 900 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 901 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 902 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 903 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 904 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 905 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 906 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 907 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 908 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 909 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 910 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 911 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 912 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 913 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 914 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 915 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 916 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 917 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 918 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 919 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 920 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 921 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 922main_body: 923 %s = extractelement <2 x i16> %coords_lo, i32 0 924 %slice = extractelement <2 x i16> %coords_lo, i32 1 925 %mip = extractelement <2 x i16> %coords_hi, i32 0 926 %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 15, i16 %s, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 927 ret <4 x float> %v 928} 929 930define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 931 ; GFX9-LABEL: name: load_mip_2darray 932 ; GFX9: bb.1.main_body: 933 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 934 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 935 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 936 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 937 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 938 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 939 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 940 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 941 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 942 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 943 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 944 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 945 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 946 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 947 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 948 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 949 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 950 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 951 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 952 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 953 ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 954 ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 955 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 956 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32) 957 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 958 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 959 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 960 ; GFX9: $vgpr0 = COPY [[UV]](s32) 961 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 962 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 963 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 964 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 965 ; GFX10NSA-LABEL: name: load_mip_2darray 966 ; GFX10NSA: bb.1.main_body: 967 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 968 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 969 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 970 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 971 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 972 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 973 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 974 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 975 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 976 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 977 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 978 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 979 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 980 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 981 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 982 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 983 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 984 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 985 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 986 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 987 ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 988 ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 989 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 990 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32) 991 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 992 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 993 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 994 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 995 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 996 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 997 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 998 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 999main_body: 1000 %s = extractelement <2 x i16> %coords_lo, i32 0 1001 %t = extractelement <2 x i16> %coords_lo, i32 1 1002 %slice = extractelement <2 x i16> %coords_hi, i32 0 1003 %mip = extractelement <2 x i16> %coords_hi, i32 1 1004 %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 1005 ret <4 x float> %v 1006} 1007 1008define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { 1009 ; GFX9-LABEL: name: store_1d 1010 ; GFX9: bb.1.main_body: 1011 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1012 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1013 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1014 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1015 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1016 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1017 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1018 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1019 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1020 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1021 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1022 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1023 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1024 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1025 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1026 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1027 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1028 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1029 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1030 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 1031 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1032 ; GFX9: S_ENDPGM 0 1033 ; GFX10NSA-LABEL: name: store_1d 1034 ; GFX10NSA: bb.1.main_body: 1035 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1036 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1037 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1038 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1039 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1040 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1041 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1042 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1043 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1044 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1045 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1046 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1047 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1048 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1049 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1050 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1051 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1052 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1053 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1054 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 1055 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1056 ; GFX10NSA: S_ENDPGM 0 1057main_body: 1058 %s = extractelement <2 x i16> %coords, i32 0 1059 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 1060 ret void 1061} 1062 1063define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { 1064 ; GFX9-LABEL: name: store_2d 1065 ; GFX9: bb.1.main_body: 1066 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1067 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1068 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1069 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1070 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1071 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1072 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1073 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1074 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1075 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1076 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1077 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1078 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1079 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1080 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1081 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1082 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1083 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1084 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1085 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1086 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1087 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1088 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1089 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1090 ; GFX9: S_ENDPGM 0 1091 ; GFX10NSA-LABEL: name: store_2d 1092 ; GFX10NSA: bb.1.main_body: 1093 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1094 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1095 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1096 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1097 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1098 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1099 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1100 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1101 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1102 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1103 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1104 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1105 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1106 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1107 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1108 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1109 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1110 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1111 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1112 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1113 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1114 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1115 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1116 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1117 ; GFX10NSA: S_ENDPGM 0 1118main_body: 1119 %s = extractelement <2 x i16> %coords, i32 0 1120 %t = extractelement <2 x i16> %coords, i32 1 1121 call void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0) 1122 ret void 1123} 1124 1125define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 1126 ; GFX9-LABEL: name: store_3d 1127 ; GFX9: bb.1.main_body: 1128 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1129 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1130 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1131 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1132 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1133 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1134 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1135 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1136 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1137 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1138 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1139 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1140 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1141 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1142 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1143 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1144 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1145 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1146 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1147 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1148 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1149 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1150 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1151 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1152 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1153 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1154 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1155 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 1156 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1157 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1158 ; GFX9: S_ENDPGM 0 1159 ; GFX10NSA-LABEL: name: store_3d 1160 ; GFX10NSA: bb.1.main_body: 1161 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1162 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1163 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1164 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1165 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1166 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1167 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1168 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1169 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1170 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1171 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1172 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1173 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1174 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1175 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1176 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1177 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1178 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1179 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1180 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1181 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1182 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1183 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1184 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1185 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1186 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1187 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1188 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 1189 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1190 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1191 ; GFX10NSA: S_ENDPGM 0 1192main_body: 1193 %s = extractelement <2 x i16> %coords_lo, i32 0 1194 %t = extractelement <2 x i16> %coords_lo, i32 1 1195 %r = extractelement <2 x i16> %coords_hi, i32 0 1196 call void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) 1197 ret void 1198} 1199 1200define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 1201 ; GFX9-LABEL: name: store_cube 1202 ; GFX9: bb.1.main_body: 1203 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1204 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1205 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1206 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1207 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1208 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1209 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1210 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1211 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1212 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1213 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1214 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1215 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1216 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1217 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1218 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1219 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1220 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1221 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1222 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1223 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1224 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1225 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1226 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1227 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1228 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1229 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1230 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 1231 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1232 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1233 ; GFX9: S_ENDPGM 0 1234 ; GFX10NSA-LABEL: name: store_cube 1235 ; GFX10NSA: bb.1.main_body: 1236 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1237 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1238 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1239 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1240 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1241 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1242 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1243 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1244 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1245 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1246 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1247 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1248 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1249 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1250 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1251 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1252 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1253 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1254 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1255 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1256 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1257 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1258 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1259 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1260 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1261 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1262 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1263 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 1264 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1265 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1266 ; GFX10NSA: S_ENDPGM 0 1267main_body: 1268 %s = extractelement <2 x i16> %coords_lo, i32 0 1269 %t = extractelement <2 x i16> %coords_lo, i32 1 1270 %slice = extractelement <2 x i16> %coords_hi, i32 0 1271 call void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) 1272 ret void 1273} 1274 1275define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { 1276 ; GFX9-LABEL: name: store_1darray 1277 ; GFX9: bb.1.main_body: 1278 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1279 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1280 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1281 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1282 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1283 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1284 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1285 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1286 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1287 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1288 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1289 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1290 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1291 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1292 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1293 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1294 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1295 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1296 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1297 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1298 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1299 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1300 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1301 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1302 ; GFX9: S_ENDPGM 0 1303 ; GFX10NSA-LABEL: name: store_1darray 1304 ; GFX10NSA: bb.1.main_body: 1305 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1306 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1307 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1308 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1309 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1310 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1311 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1312 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1313 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1314 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1315 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1316 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1317 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1318 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1319 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1320 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1321 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1322 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1323 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1324 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1325 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1326 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1327 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1328 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1329 ; GFX10NSA: S_ENDPGM 0 1330main_body: 1331 %s = extractelement <2 x i16> %coords, i32 0 1332 %slice = extractelement <2 x i16> %coords, i32 1 1333 call void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) 1334 ret void 1335} 1336 1337define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 1338 ; GFX9-LABEL: name: store_2darray 1339 ; GFX9: bb.1.main_body: 1340 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1341 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1342 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1343 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1344 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1345 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1346 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1347 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1348 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1349 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1350 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1351 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1352 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1353 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1354 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1355 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1356 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1357 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1358 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1359 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1360 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1361 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1362 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1363 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1364 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1365 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1366 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1367 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 1368 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1369 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1370 ; GFX9: S_ENDPGM 0 1371 ; GFX10NSA-LABEL: name: store_2darray 1372 ; GFX10NSA: bb.1.main_body: 1373 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1374 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1375 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1376 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1377 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1378 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1379 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1380 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1381 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1382 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1383 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1384 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1385 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1386 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1387 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1388 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1389 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1390 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1391 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1392 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1393 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1394 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1395 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1396 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1397 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1398 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1399 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1400 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 1401 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1402 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1403 ; GFX10NSA: S_ENDPGM 0 1404main_body: 1405 %s = extractelement <2 x i16> %coords_lo, i32 0 1406 %t = extractelement <2 x i16> %coords_lo, i32 1 1407 %slice = extractelement <2 x i16> %coords_hi, i32 0 1408 call void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) 1409 ret void 1410} 1411 1412define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 1413 ; GFX9-LABEL: name: store_2dmsaa 1414 ; GFX9: bb.1.main_body: 1415 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1416 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1417 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1418 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1419 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1420 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1421 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1422 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1423 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1424 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1425 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1426 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1427 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1428 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1429 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1430 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1431 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1432 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1433 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1434 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1435 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1436 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1437 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1438 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1439 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1440 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1441 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1442 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 1443 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1444 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1445 ; GFX9: S_ENDPGM 0 1446 ; GFX10NSA-LABEL: name: store_2dmsaa 1447 ; GFX10NSA: bb.1.main_body: 1448 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1449 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1450 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1451 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1452 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1453 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1454 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1455 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1456 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1457 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1458 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1459 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1460 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1461 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1462 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1463 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1464 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1465 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1466 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1467 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1468 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1469 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1470 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1471 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1472 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1473 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1474 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1475 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 1476 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1477 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1478 ; GFX10NSA: S_ENDPGM 0 1479main_body: 1480 %s = extractelement <2 x i16> %coords_lo, i32 0 1481 %t = extractelement <2 x i16> %coords_lo, i32 1 1482 %fragid = extractelement <2 x i16> %coords_hi, i32 0 1483 call void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 1484 ret void 1485} 1486 1487define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 1488 ; GFX9-LABEL: name: store_2darraymsaa 1489 ; GFX9: bb.1.main_body: 1490 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1491 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1492 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1493 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1494 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1495 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1496 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1497 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1498 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1499 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1500 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1501 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1502 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1503 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1504 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1505 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1506 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1507 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1508 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1509 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1510 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1511 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1512 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1513 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1514 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1515 ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1516 ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 1517 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1518 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32) 1519 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1520 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1521 ; GFX9: S_ENDPGM 0 1522 ; GFX10NSA-LABEL: name: store_2darraymsaa 1523 ; GFX10NSA: bb.1.main_body: 1524 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1525 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1526 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1527 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1528 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1529 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1530 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1531 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1532 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1533 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1534 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1535 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1536 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1537 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1538 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1539 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1540 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1541 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1542 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1543 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1544 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1545 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1546 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1547 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1548 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1549 ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1550 ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 1551 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1552 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32) 1553 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1554 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1555 ; GFX10NSA: S_ENDPGM 0 1556main_body: 1557 %s = extractelement <2 x i16> %coords_lo, i32 0 1558 %t = extractelement <2 x i16> %coords_lo, i32 1 1559 %slice = extractelement <2 x i16> %coords_hi, i32 0 1560 %fragid = extractelement <2 x i16> %coords_hi, i32 1 1561 call void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 1562 ret void 1563} 1564 1565define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { 1566 ; GFX9-LABEL: name: store_mip_1d 1567 ; GFX9: bb.1.main_body: 1568 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1569 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1570 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1571 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1572 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1573 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1574 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1575 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1576 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1577 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1578 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1579 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1580 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1581 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1582 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1583 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1584 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1585 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1586 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1587 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1588 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1589 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1590 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1591 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1592 ; GFX9: S_ENDPGM 0 1593 ; GFX10NSA-LABEL: name: store_mip_1d 1594 ; GFX10NSA: bb.1.main_body: 1595 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1596 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1597 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1598 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1599 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1600 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1601 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1602 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1603 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1604 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1605 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1606 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1607 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1608 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1609 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1610 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1611 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1612 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1613 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1614 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1615 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1616 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1617 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1618 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1619 ; GFX10NSA: S_ENDPGM 0 1620main_body: 1621 %s = extractelement <2 x i16> %coords, i32 0 1622 %mip = extractelement <2 x i16> %coords, i32 1 1623 call void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 1624 ret void 1625} 1626 1627define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 1628 ; GFX9-LABEL: name: store_mip_2d 1629 ; GFX9: bb.1.main_body: 1630 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1631 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1632 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1633 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1634 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1635 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1636 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1637 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1638 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1639 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1640 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1641 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1642 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1643 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1644 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1645 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1646 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1647 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1648 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1649 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1650 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1651 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1652 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1653 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1654 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1655 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1656 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1657 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 1658 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1659 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1660 ; GFX9: S_ENDPGM 0 1661 ; GFX10NSA-LABEL: name: store_mip_2d 1662 ; GFX10NSA: bb.1.main_body: 1663 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1664 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1665 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1666 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1667 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1668 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1669 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1670 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1671 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1672 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1673 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1674 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1675 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1676 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1677 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1678 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1679 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1680 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1681 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1682 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1683 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1684 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1685 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1686 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1687 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1688 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1689 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1690 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 1691 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1692 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1693 ; GFX10NSA: S_ENDPGM 0 1694main_body: 1695 %s = extractelement <2 x i16> %coords_lo, i32 0 1696 %t = extractelement <2 x i16> %coords_lo, i32 1 1697 %mip = extractelement <2 x i16> %coords_hi, i32 0 1698 call void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 1699 ret void 1700} 1701 1702define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 1703 ; GFX9-LABEL: name: store_mip_3d 1704 ; GFX9: bb.1.main_body: 1705 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1706 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1707 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1708 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1709 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1710 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1711 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1712 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1713 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1714 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1715 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1716 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1717 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1718 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1719 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1720 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1721 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1722 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1723 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1724 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1725 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1726 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1727 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1728 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1729 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1730 ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1731 ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 1732 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1733 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32) 1734 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1735 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1736 ; GFX9: S_ENDPGM 0 1737 ; GFX10NSA-LABEL: name: store_mip_3d 1738 ; GFX10NSA: bb.1.main_body: 1739 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1740 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1741 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1742 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1743 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1744 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1745 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1746 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1747 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1748 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1749 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1750 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1751 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1752 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1753 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1754 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1755 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1756 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1757 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1758 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1759 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1760 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1761 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1762 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1763 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1764 ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1765 ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 1766 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1767 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32) 1768 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1769 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1770 ; GFX10NSA: S_ENDPGM 0 1771main_body: 1772 %s = extractelement <2 x i16> %coords_lo, i32 0 1773 %t = extractelement <2 x i16> %coords_lo, i32 1 1774 %r = extractelement <2 x i16> %coords_hi, i32 0 1775 %mip = extractelement <2 x i16> %coords_hi, i32 1 1776 call void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %r, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 1777 ret void 1778} 1779 1780define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 1781 ; GFX9-LABEL: name: store_mip_cube 1782 ; GFX9: bb.1.main_body: 1783 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1784 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1785 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1786 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1787 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1788 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1789 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1790 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1791 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1792 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1793 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1794 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1795 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1796 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1797 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1798 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1799 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1800 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1801 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1802 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1803 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1804 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1805 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1806 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1807 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1808 ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1809 ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 1810 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1811 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32) 1812 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1813 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1814 ; GFX9: S_ENDPGM 0 1815 ; GFX10NSA-LABEL: name: store_mip_cube 1816 ; GFX10NSA: bb.1.main_body: 1817 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1818 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1819 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1820 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1821 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1822 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1823 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1824 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1825 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1826 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1827 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1828 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1829 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1830 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1831 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1832 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1833 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1834 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1835 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1836 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1837 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1838 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1839 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1840 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1841 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1842 ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1843 ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 1844 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1845 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32) 1846 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1847 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1848 ; GFX10NSA: S_ENDPGM 0 1849main_body: 1850 %s = extractelement <2 x i16> %coords_lo, i32 0 1851 %t = extractelement <2 x i16> %coords_lo, i32 1 1852 %slice = extractelement <2 x i16> %coords_hi, i32 0 1853 %mip = extractelement <2 x i16> %coords_hi, i32 1 1854 call void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 1855 ret void 1856} 1857 1858define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 1859 ; GFX9-LABEL: name: store_mip_1darray 1860 ; GFX9: bb.1.main_body: 1861 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1862 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1863 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1864 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1865 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1866 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1867 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1868 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1869 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1870 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1871 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1872 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1873 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1874 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1875 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1876 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1877 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1878 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1879 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1880 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1881 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1882 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1883 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1884 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1885 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1886 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1887 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1888 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 1889 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1890 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1891 ; GFX9: S_ENDPGM 0 1892 ; GFX10NSA-LABEL: name: store_mip_1darray 1893 ; GFX10NSA: bb.1.main_body: 1894 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1895 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1896 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1897 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1898 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1899 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1900 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1901 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1902 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1903 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1904 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1905 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1906 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1907 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1908 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1909 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1910 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1911 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1912 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1913 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1914 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1915 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1916 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1917 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1918 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1919 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1920 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1921 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) 1922 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1923 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1924 ; GFX10NSA: S_ENDPGM 0 1925main_body: 1926 %s = extractelement <2 x i16> %coords_lo, i32 0 1927 %slice = extractelement <2 x i16> %coords_lo, i32 1 1928 %mip = extractelement <2 x i16> %coords_hi, i32 0 1929 call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 1930 ret void 1931} 1932 1933define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 1934 ; GFX9-LABEL: name: store_mip_2darray 1935 ; GFX9: bb.1.main_body: 1936 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1937 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1938 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1939 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1940 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1941 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1942 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1943 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1944 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1945 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1946 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1947 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1948 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1949 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1950 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1951 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1952 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1953 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1954 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1955 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1956 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1957 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1958 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1959 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1960 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1961 ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1962 ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 1963 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1964 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32) 1965 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1966 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 1967 ; GFX9: S_ENDPGM 0 1968 ; GFX10NSA-LABEL: name: store_mip_2darray 1969 ; GFX10NSA: bb.1.main_body: 1970 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1971 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1972 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1973 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1974 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1975 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1976 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1977 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1978 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1979 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1980 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1981 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1982 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1983 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1984 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1985 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1986 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1987 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1988 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1989 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1990 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1991 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1992 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1993 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1994 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1995 ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1996 ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 1997 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 1998 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32) 1999 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 2000 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 2001 ; GFX10NSA: S_ENDPGM 0 2002main_body: 2003 %s = extractelement <2 x i16> %coords_lo, i32 0 2004 %t = extractelement <2 x i16> %coords_lo, i32 1 2005 %slice = extractelement <2 x i16> %coords_hi, i32 0 2006 %mip = extractelement <2 x i16> %coords_hi, i32 1 2007 call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 2008 ret void 2009} 2010 2011define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2012 ; GFX9-LABEL: name: getresinfo_1d 2013 ; GFX9: bb.1.main_body: 2014 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2015 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2016 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2017 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2018 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2019 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2020 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2021 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2022 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2023 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2024 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2025 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2026 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2027 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2028 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2029 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2030 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2031 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2032 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2033 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2034 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2035 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2036 ; GFX10NSA-LABEL: name: getresinfo_1d 2037 ; GFX10NSA: bb.1.main_body: 2038 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2039 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2040 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2041 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2042 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2043 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2044 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2045 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2046 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2047 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2048 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2049 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2050 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2051 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2052 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2053 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2054 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2055 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2056 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2057 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2058 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2059 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2060main_body: 2061 %mip = extractelement <2 x i16> %coords, i32 0 2062 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 2063 ret <4 x float> %v 2064} 2065 2066define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2067 ; GFX9-LABEL: name: getresinfo_2d 2068 ; GFX9: bb.1.main_body: 2069 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2070 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2071 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2072 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2073 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2074 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2075 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2076 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2077 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2078 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2079 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2080 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2081 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2082 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2083 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2084 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2085 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2086 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2087 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2088 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2089 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2090 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2091 ; GFX10NSA-LABEL: name: getresinfo_2d 2092 ; GFX10NSA: bb.1.main_body: 2093 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2094 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2095 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2096 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2097 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2098 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2099 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2100 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2101 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2102 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2103 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2104 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2105 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2106 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2107 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2108 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2109 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2110 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2111 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2112 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2113 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2114 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2115main_body: 2116 %mip = extractelement <2 x i16> %coords, i32 0 2117 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 2118 ret <4 x float> %v 2119} 2120 2121define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2122 ; GFX9-LABEL: name: getresinfo_3d 2123 ; GFX9: bb.1.main_body: 2124 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2125 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2126 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2127 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2128 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2129 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2130 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2131 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2132 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2133 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2134 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2135 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2136 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2137 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2138 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2139 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2140 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2141 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2142 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2143 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2144 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2145 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2146 ; GFX10NSA-LABEL: name: getresinfo_3d 2147 ; GFX10NSA: bb.1.main_body: 2148 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2149 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2150 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2151 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2152 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2153 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2154 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2155 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2156 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2157 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2158 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2159 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2160 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2161 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2162 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2163 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2164 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2165 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2166 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2167 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2168 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2169 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2170main_body: 2171 %mip = extractelement <2 x i16> %coords, i32 0 2172 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 2173 ret <4 x float> %v 2174} 2175 2176define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2177 ; GFX9-LABEL: name: getresinfo_cube 2178 ; GFX9: bb.1.main_body: 2179 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2180 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2181 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2182 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2183 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2184 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2185 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2186 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2187 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2188 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2189 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2190 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2191 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2192 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2193 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2194 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2195 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2196 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2197 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2198 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2199 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2200 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2201 ; GFX10NSA-LABEL: name: getresinfo_cube 2202 ; GFX10NSA: bb.1.main_body: 2203 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2204 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2205 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2206 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2207 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2208 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2209 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2210 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2211 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2212 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2213 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2214 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2215 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2216 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2217 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2218 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2219 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2220 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2221 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2222 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2223 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2224 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2225main_body: 2226 %mip = extractelement <2 x i16> %coords, i32 0 2227 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 2228 ret <4 x float> %v 2229} 2230 2231define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2232 ; GFX9-LABEL: name: getresinfo_1darray 2233 ; GFX9: bb.1.main_body: 2234 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2235 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2236 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2237 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2238 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2239 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2240 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2241 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2242 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2243 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2244 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2245 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2246 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2247 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2248 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2249 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2250 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2251 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2252 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2253 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2254 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2255 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2256 ; GFX10NSA-LABEL: name: getresinfo_1darray 2257 ; GFX10NSA: bb.1.main_body: 2258 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2259 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2260 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2261 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2262 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2263 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2264 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2265 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2266 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2267 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2268 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2269 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2270 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2271 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2272 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2273 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2274 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2275 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2276 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2277 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2278 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2279 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2280main_body: 2281 %mip = extractelement <2 x i16> %coords, i32 0 2282 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 2283 ret <4 x float> %v 2284} 2285 2286define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2287 ; GFX9-LABEL: name: getresinfo_2darray 2288 ; GFX9: bb.1.main_body: 2289 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2290 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2291 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2292 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2293 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2294 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2295 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2296 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2297 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2298 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2299 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2300 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2301 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2302 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2303 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2304 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2305 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2306 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2307 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2308 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2309 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2310 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2311 ; GFX10NSA-LABEL: name: getresinfo_2darray 2312 ; GFX10NSA: bb.1.main_body: 2313 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2314 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2315 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2316 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2317 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2318 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2319 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2320 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2321 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2322 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2323 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2324 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2325 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2326 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2327 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2328 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2329 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2330 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2331 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2332 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2333 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2334 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2335main_body: 2336 %mip = extractelement <2 x i16> %coords, i32 0 2337 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 2338 ret <4 x float> %v 2339} 2340 2341define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2342 ; GFX9-LABEL: name: getresinfo_2dmsaa 2343 ; GFX9: bb.1.main_body: 2344 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2345 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2346 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2347 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2348 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2349 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2350 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2351 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2352 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2353 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2354 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2355 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2356 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2357 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2358 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2359 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2360 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2361 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2362 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2363 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2364 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2365 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2366 ; GFX10NSA-LABEL: name: getresinfo_2dmsaa 2367 ; GFX10NSA: bb.1.main_body: 2368 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2369 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2370 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2371 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2372 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2373 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2374 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2375 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2376 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2377 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2378 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2379 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2380 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2381 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2382 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2383 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2384 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2385 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2386 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2387 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2388 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2389 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2390main_body: 2391 %mip = extractelement <2 x i16> %coords, i32 0 2392 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 2393 ret <4 x float> %v 2394} 2395 2396define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2397 ; GFX9-LABEL: name: getresinfo_2darraymsaa 2398 ; GFX9: bb.1.main_body: 2399 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2400 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2401 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2402 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2403 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2404 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2405 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2406 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2407 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2408 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2409 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2410 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2411 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2412 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2413 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2414 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2415 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2416 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2417 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2418 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2419 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2420 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2421 ; GFX10NSA-LABEL: name: getresinfo_2darraymsaa 2422 ; GFX10NSA: bb.1.main_body: 2423 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2424 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2425 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2426 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2427 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2428 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2429 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2430 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2431 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2432 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2433 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2434 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2435 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2436 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2437 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2438 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2439 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2440 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2441 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2442 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2443 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2444 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2445main_body: 2446 %mip = extractelement <2 x i16> %coords, i32 0 2447 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 2448 ret <4 x float> %v 2449} 2450 2451define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2452 ; GFX9-LABEL: name: load_1d_V1 2453 ; GFX9: bb.1.main_body: 2454 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2455 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2456 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2457 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2458 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2459 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2460 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2461 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2462 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2463 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2464 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2465 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2466 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2467 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2468 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2469 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (s32) from custom "ImageResource") 2470 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 2471 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 2472 ; GFX10NSA-LABEL: name: load_1d_V1 2473 ; GFX10NSA: bb.1.main_body: 2474 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2475 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2476 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2477 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2478 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2479 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2480 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2481 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2482 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2483 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2484 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2485 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2486 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2487 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2488 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2489 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (s32) from custom "ImageResource") 2490 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 2491 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 2492main_body: 2493 %s = extractelement <2 x i16> %coords, i32 0 2494 %v = call float @llvm.amdgcn.image.load.1d.f32.i16(i32 8, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 2495 ret float %v 2496} 2497 2498define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2499 ; GFX9-LABEL: name: load_1d_V2 2500 ; GFX9: bb.1.main_body: 2501 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2502 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2503 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2504 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2505 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2506 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2507 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2508 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2509 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2510 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2511 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2512 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2513 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2514 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2515 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2516 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") 2517 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 2518 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2519 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2520 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 2521 ; GFX10NSA-LABEL: name: load_1d_V2 2522 ; GFX10NSA: bb.1.main_body: 2523 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2524 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2525 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2526 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2527 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2528 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2529 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2530 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2531 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2532 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2533 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2534 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2535 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2536 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2537 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2538 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") 2539 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 2540 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2541 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2542 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 2543main_body: 2544 %s = extractelement <2 x i16> %coords, i32 0 2545 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32 9, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 2546 ret <2 x float> %v 2547} 2548 2549define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, <2 x i16> %coords) { 2550 ; GFX9-LABEL: name: store_1d_V1 2551 ; GFX9: bb.1.main_body: 2552 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 2553 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2554 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2555 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2556 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2557 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2558 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2559 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2560 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2561 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2562 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 2563 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 2564 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2565 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 2566 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2567 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2568 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (s32) into custom "ImageResource") 2569 ; GFX9: S_ENDPGM 0 2570 ; GFX10NSA-LABEL: name: store_1d_V1 2571 ; GFX10NSA: bb.1.main_body: 2572 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 2573 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2574 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2575 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2576 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2577 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2578 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2579 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2580 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2581 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2582 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 2583 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 2584 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2585 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 2586 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2587 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2588 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (s32) into custom "ImageResource") 2589 ; GFX10NSA: S_ENDPGM 0 2590main_body: 2591 %s = extractelement <2 x i16> %coords, i32 0 2592 call void @llvm.amdgcn.image.store.1d.f32.i16(float %vdata, i32 2, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 2593 ret void 2594} 2595 2596define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, <2 x i16> %coords) { 2597 ; GFX9-LABEL: name: store_1d_V2 2598 ; GFX9: bb.1.main_body: 2599 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 2600 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2601 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2602 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2603 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2604 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2605 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2606 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2607 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2608 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2609 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 2610 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 2611 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 2612 ; GFX9: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 2613 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2614 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) 2615 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2616 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2617 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<2 x s32>) into custom "ImageResource") 2618 ; GFX9: S_ENDPGM 0 2619 ; GFX10NSA-LABEL: name: store_1d_V2 2620 ; GFX10NSA: bb.1.main_body: 2621 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 2622 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2623 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2624 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2625 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2626 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2627 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2628 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2629 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2630 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2631 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 2632 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 2633 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 2634 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 2635 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2636 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) 2637 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2638 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2639 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<2 x s32>) into custom "ImageResource") 2640 ; GFX10NSA: S_ENDPGM 0 2641main_body: 2642 %s = extractelement <2 x i16> %coords, i32 0 2643 call void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float> %vdata, i32 12, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 2644 ret void 2645} 2646 2647define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2648 ; GFX9-LABEL: name: load_1d_glc 2649 ; GFX9: bb.1.main_body: 2650 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2651 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2652 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2653 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2654 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2655 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2656 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2657 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2658 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2659 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2660 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2661 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2662 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2663 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2664 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2665 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 2666 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2667 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2668 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2669 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2670 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2671 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2672 ; GFX10NSA-LABEL: name: load_1d_glc 2673 ; GFX10NSA: bb.1.main_body: 2674 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2675 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2676 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2677 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2678 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2679 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2680 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2681 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2682 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2683 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2684 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2685 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2686 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2687 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2688 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2689 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 2690 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2691 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2692 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2693 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2694 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2695 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2696main_body: 2697 %s = extractelement <2 x i16> %coords, i32 0 2698 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 1) 2699 ret <4 x float> %v 2700} 2701 2702define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2703 ; GFX9-LABEL: name: load_1d_slc 2704 ; GFX9: bb.1.main_body: 2705 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2706 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2707 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2708 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2709 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2710 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2711 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2712 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2713 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2714 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2715 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2716 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2717 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2718 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2719 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2720 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 2721 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2722 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2723 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2724 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2725 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2726 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2727 ; GFX10NSA-LABEL: name: load_1d_slc 2728 ; GFX10NSA: bb.1.main_body: 2729 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2730 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2731 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2732 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2733 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2734 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2735 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2736 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2737 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2738 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2739 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2740 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2741 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2742 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2743 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2744 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 2745 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2746 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2747 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2748 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2749 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2750 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2751main_body: 2752 %s = extractelement <2 x i16> %coords, i32 0 2753 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 2) 2754 ret <4 x float> %v 2755} 2756 2757define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2758 ; GFX9-LABEL: name: load_1d_glc_slc 2759 ; GFX9: bb.1.main_body: 2760 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2761 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2762 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2763 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2764 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2765 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2766 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2767 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2768 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2769 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2770 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2771 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2772 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2773 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2774 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2775 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 2776 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2777 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2778 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2779 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2780 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2781 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2782 ; GFX10NSA-LABEL: name: load_1d_glc_slc 2783 ; GFX10NSA: bb.1.main_body: 2784 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2785 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2786 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2787 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2788 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2789 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2790 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2791 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2792 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2793 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2794 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2795 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2796 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2797 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2798 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2799 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 2800 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2801 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2802 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2803 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2804 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2805 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2806main_body: 2807 %s = extractelement <2 x i16> %coords, i32 0 2808 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 3) 2809 ret <4 x float> %v 2810} 2811 2812define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { 2813 ; GFX9-LABEL: name: store_1d_glc 2814 ; GFX9: bb.1.main_body: 2815 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 2816 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2817 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2818 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2819 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2820 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2821 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2822 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2823 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2824 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2825 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 2826 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 2827 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 2828 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 2829 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 2830 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 2831 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2832 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 2833 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2834 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2835 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 2836 ; GFX9: S_ENDPGM 0 2837 ; GFX10NSA-LABEL: name: store_1d_glc 2838 ; GFX10NSA: bb.1.main_body: 2839 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 2840 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2841 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2842 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2843 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2844 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2845 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2846 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2847 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2848 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2849 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 2850 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 2851 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 2852 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 2853 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 2854 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 2855 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2856 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 2857 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2858 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2859 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 2860 ; GFX10NSA: S_ENDPGM 0 2861main_body: 2862 %s = extractelement <2 x i16> %coords, i32 0 2863 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 1) 2864 ret void 2865} 2866 2867define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { 2868 ; GFX9-LABEL: name: store_1d_slc 2869 ; GFX9: bb.1.main_body: 2870 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 2871 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2872 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2873 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2874 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2875 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2876 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2877 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2878 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2879 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2880 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 2881 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 2882 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 2883 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 2884 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 2885 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 2886 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2887 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 2888 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2889 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2890 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 2891 ; GFX9: S_ENDPGM 0 2892 ; GFX10NSA-LABEL: name: store_1d_slc 2893 ; GFX10NSA: bb.1.main_body: 2894 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 2895 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2896 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2897 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2898 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2899 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2900 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2901 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2902 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2903 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2904 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 2905 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 2906 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 2907 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 2908 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 2909 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 2910 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2911 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 2912 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2913 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2914 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 2915 ; GFX10NSA: S_ENDPGM 0 2916main_body: 2917 %s = extractelement <2 x i16> %coords, i32 0 2918 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 2) 2919 ret void 2920} 2921 2922define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { 2923 ; GFX9-LABEL: name: store_1d_glc_slc 2924 ; GFX9: bb.1.main_body: 2925 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 2926 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2927 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2928 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2929 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2930 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2931 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2932 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2933 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2934 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2935 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 2936 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 2937 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 2938 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 2939 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 2940 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 2941 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2942 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 2943 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2944 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2945 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 2946 ; GFX9: S_ENDPGM 0 2947 ; GFX10NSA-LABEL: name: store_1d_glc_slc 2948 ; GFX10NSA: bb.1.main_body: 2949 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 2950 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2951 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2952 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2953 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2954 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2955 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2956 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2957 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2958 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2959 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 2960 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 2961 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 2962 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 2963 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 2964 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 2965 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2966 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 2967 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2968 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2969 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") 2970 ; GFX10NSA: S_ENDPGM 0 2971main_body: 2972 %s = extractelement <2 x i16> %coords, i32 0 2973 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 3) 2974 ret void 2975} 2976 2977define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { 2978 ; GFX9-LABEL: name: getresinfo_dmask0 2979 ; GFX9: bb.1.main_body: 2980 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2981 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF 2982 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) 2983 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2984 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2985 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2986 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2987 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2988 ; GFX10NSA-LABEL: name: getresinfo_dmask0 2989 ; GFX10NSA: bb.1.main_body: 2990 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2991 ; GFX10NSA: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF 2992 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) 2993 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2994 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2995 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2996 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2997 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2998main_body: 2999 %mip = extractelement <2 x i16> %coords, i32 0 3000 %r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 0, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 3001 ret <4 x float> %r 3002} 3003 3004define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 3005 ; GFX9-LABEL: name: load_1d_tfe 3006 ; GFX9: bb.1.main_body: 3007 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 3008 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3009 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3010 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3011 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3012 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3013 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3014 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3015 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3016 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 3017 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 3018 ; GFX9: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 3019 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 3020 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3021 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 3022 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 3023 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 3024 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) 3025 ; GFX9: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 3026 ; GFX9: $vgpr0 = COPY [[UV]](s32) 3027 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 3028 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 3029 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 3030 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 3031 ; GFX10NSA-LABEL: name: load_1d_tfe 3032 ; GFX10NSA: bb.1.main_body: 3033 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 3034 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3035 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3036 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3037 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3038 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3039 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3040 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3041 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3042 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 3043 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 3044 ; GFX10NSA: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 3045 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 3046 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3047 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 3048 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 3049 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 3050 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) 3051 ; GFX10NSA: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 3052 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 3053 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 3054 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 3055 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 3056 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 3057main_body: 3058 %s = extractelement <2 x i16> %coords, i32 0 3059 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 1, i32 0) 3060 %data = extractvalue { <4 x float>, i32 } %v, 0 3061 %tfe = extractvalue { <4 x float>, i32 } %v, 1 3062 store i32 %tfe, i32 addrspace(1)* undef 3063 ret <4 x float> %data 3064} 3065 3066define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 3067 ; GFX9-LABEL: name: load_2d_tfe 3068 ; GFX9: bb.1.main_body: 3069 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 3070 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3071 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3072 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3073 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3074 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3075 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3076 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3077 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3078 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 3079 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 3080 ; GFX9: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 3081 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 3082 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3083 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 3084 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3085 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 3086 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 3087 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 3088 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 3089 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) 3090 ; GFX9: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 3091 ; GFX9: $vgpr0 = COPY [[UV]](s32) 3092 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 3093 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 3094 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 3095 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 3096 ; GFX10NSA-LABEL: name: load_2d_tfe 3097 ; GFX10NSA: bb.1.main_body: 3098 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 3099 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3100 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3101 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3102 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3103 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3104 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3105 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3106 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3107 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 3108 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 3109 ; GFX10NSA: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 3110 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 3111 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3112 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 3113 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3114 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 3115 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 3116 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 3117 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 3118 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) 3119 ; GFX10NSA: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 3120 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 3121 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 3122 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 3123 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 3124 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 3125main_body: 3126 %s = extractelement <2 x i16> %coords, i32 0 3127 %t = extractelement <2 x i16> %coords, i32 1 3128 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 1, i32 0) 3129 %data = extractvalue { <4 x float>, i32 } %v, 0 3130 %tfe = extractvalue { <4 x float>, i32 } %v, 1 3131 store i32 %tfe, i32 addrspace(1)* undef 3132 ret <4 x float> %data 3133} 3134 3135define amdgpu_ps <4 x float> @load_3d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 3136 ; GFX9-LABEL: name: load_3d_tfe 3137 ; GFX9: bb.1.main_body: 3138 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 3139 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3140 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3141 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3142 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3143 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3144 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3145 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3146 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3147 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 3148 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 3149 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 3150 ; GFX9: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 3151 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 3152 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3153 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 3154 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3155 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 3156 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 3157 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 3158 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 3159 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 3160 ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 3161 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF1]](s32) 3162 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 3163 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 3164 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) 3165 ; GFX9: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 3166 ; GFX9: $vgpr0 = COPY [[UV]](s32) 3167 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 3168 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 3169 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 3170 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 3171 ; GFX10NSA-LABEL: name: load_3d_tfe 3172 ; GFX10NSA: bb.1.main_body: 3173 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 3174 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3175 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3176 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3177 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3178 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3179 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3180 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3181 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3182 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 3183 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 3184 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 3185 ; GFX10NSA: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 3186 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 3187 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3188 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 3189 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3190 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 3191 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 3192 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 3193 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 3194 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 3195 ; GFX10NSA: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 3196 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF1]](s32) 3197 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 3198 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 3199 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) 3200 ; GFX10NSA: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 3201 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 3202 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 3203 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 3204 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 3205 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 3206main_body: 3207 %s = extractelement <2 x i16> %coords_lo, i32 0 3208 %t = extractelement <2 x i16> %coords_lo, i32 1 3209 %r = extractelement <2 x i16> %coords_hi, i32 0 3210 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 1, i32 0) 3211 %data = extractvalue { <4 x float>, i32 } %v, 0 3212 %tfe = extractvalue { <4 x float>, i32 } %v, 1 3213 store i32 %tfe, i32 addrspace(1)* undef 3214 ret <4 x float> %data 3215} 3216 3217define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 3218 ; GFX9-LABEL: name: load_2darraymsaa_tfe 3219 ; GFX9: bb.1.main_body: 3220 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 3221 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3222 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3223 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3224 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3225 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3226 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3227 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3228 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3229 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 3230 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 3231 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 3232 ; GFX9: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 3233 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 3234 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3235 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 3236 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3237 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 3238 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 3239 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 3240 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 3241 ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 3242 ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 3243 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 3244 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32) 3245 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 3246 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 3247 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) 3248 ; GFX9: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 3249 ; GFX9: $vgpr0 = COPY [[UV]](s32) 3250 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 3251 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 3252 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 3253 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 3254 ; GFX10NSA-LABEL: name: load_2darraymsaa_tfe 3255 ; GFX10NSA: bb.1.main_body: 3256 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 3257 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3258 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3259 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3260 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3261 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3262 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3263 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3264 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3265 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 3266 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 3267 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 3268 ; GFX10NSA: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 3269 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 3270 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3271 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 3272 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3273 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 3274 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 3275 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 3276 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 3277 ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 3278 ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 3279 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) 3280 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32) 3281 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 3282 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") 3283 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) 3284 ; GFX10NSA: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 3285 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 3286 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 3287 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 3288 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 3289 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 3290main_body: 3291 %s = extractelement <2 x i16> %coords_lo, i32 0 3292 %t = extractelement <2 x i16> %coords_lo, i32 1 3293 %slice = extractelement <2 x i16> %coords_hi, i32 0 3294 %fragid = extractelement <2 x i16> %coords_hi, i32 1 3295 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 1, i32 0) 3296 %data = extractvalue { <4 x float>, i32 } %v, 0 3297 %tfe = extractvalue { <4 x float>, i32 } %v, 1 3298 store i32 %tfe, i32 addrspace(1)* undef 3299 ret <4 x float> %data 3300} 3301 3302declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3303declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3304declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3305declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3306declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3307declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3308declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3309declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3310declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3311declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3312declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3313declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3314declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3315declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3316declare void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float>, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3317declare void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3318declare void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3319declare void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3320declare void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3321declare void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3322declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3323declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3324declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3325declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3326declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3327declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3328declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3329declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3330declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3 3331declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3 3332declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3 3333declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3 3334declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3 3335declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3 3336declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3 3337declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3 3338declare float @llvm.amdgcn.image.load.1d.f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3339declare float @llvm.amdgcn.image.load.2d.f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3340declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3341declare void @llvm.amdgcn.image.store.1d.f32.i16(float, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3342declare void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float>, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3343declare { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3344declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3345declare { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3346declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3347 3348attributes #0 = { nounwind } 3349attributes #1 = { nounwind readonly } 3350attributes #2 = { nounwind writeonly } 3351attributes #3 = { nounwind readnone } 3352