1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s 4 5define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 6 ; GFX9-LABEL: name: load_1d 7 ; GFX9: bb.1.main_body: 8 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 9 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 10 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 11 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 12 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 13 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 14 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 15 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 16 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 17 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 18 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 19 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 20 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 21 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 22 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 23 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 24 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 25 ; GFX9: $vgpr0 = COPY [[UV]](s32) 26 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 27 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 28 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 29 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 30 ; GFX10NSA-LABEL: name: load_1d 31 ; GFX10NSA: bb.1.main_body: 32 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 33 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 34 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 35 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 36 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 37 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 38 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 39 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 40 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 41 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 42 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 43 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 44 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 45 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 46 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 47 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 48 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 49 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 50 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 51 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 52 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 53 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 54main_body: 55 %s = extractelement <2 x i16> %coords, i32 0 56 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 57 ret <4 x float> %v 58} 59 60define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 61 ; GFX9-LABEL: name: load_2d 62 ; GFX9: bb.1.main_body: 63 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 64 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 65 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 66 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 67 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 68 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 69 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 70 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 71 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 72 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 73 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 74 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 75 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 76 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 77 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 78 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 79 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 80 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 81 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 82 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) 83 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 84 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 85 ; GFX9: $vgpr0 = COPY [[UV]](s32) 86 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 87 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 88 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 89 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 90 ; GFX10NSA-LABEL: name: load_2d 91 ; GFX10NSA: bb.1.main_body: 92 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 93 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 94 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 95 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 96 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 97 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 98 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 99 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 100 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 101 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 102 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 103 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 104 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 105 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 106 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 107 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 108 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 109 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 110 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 111 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) 112 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 113 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 114 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 115 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 116 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 117 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 118 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 119main_body: 120 %s = extractelement <2 x i16> %coords, i32 0 121 %t = extractelement <2 x i16> %coords, i32 1 122 %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0) 123 ret <4 x float> %v 124} 125 126define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 127 ; GFX9-LABEL: name: load_3d 128 ; GFX9: bb.1.main_body: 129 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 130 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 131 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 132 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 133 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 134 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 135 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 136 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 137 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 138 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 139 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 140 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 141 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 142 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 143 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 144 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 145 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 146 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 147 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 148 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 149 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 150 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 151 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 152 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 153 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 154 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) 155 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 156 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 157 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 158 ; GFX9: $vgpr0 = COPY [[UV]](s32) 159 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 160 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 161 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 162 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 163 ; GFX10NSA-LABEL: name: load_3d 164 ; GFX10NSA: bb.1.main_body: 165 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 166 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 167 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 168 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 169 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 170 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 171 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 172 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 173 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 174 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 175 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 176 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 177 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 178 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 179 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 180 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 181 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 182 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 183 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 184 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 185 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 186 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 187 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 188 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 189 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 190 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) 191 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 192 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 193 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 194 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 195 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 196 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 197 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 198 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 199main_body: 200 %s = extractelement <2 x i16> %coords_lo, i32 0 201 %t = extractelement <2 x i16> %coords_lo, i32 1 202 %r = extractelement <2 x i16> %coords_hi, i32 0 203 %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) 204 ret <4 x float> %v 205} 206 207define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 208 ; GFX9-LABEL: name: load_cube 209 ; GFX9: bb.1.main_body: 210 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 211 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 212 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 213 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 214 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 215 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 216 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 217 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 218 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 219 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 220 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 221 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 222 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 223 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 224 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 225 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 226 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 227 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 228 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 229 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 230 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 231 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 232 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 233 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 234 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 235 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) 236 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 237 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 238 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 239 ; GFX9: $vgpr0 = COPY [[UV]](s32) 240 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 241 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 242 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 243 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 244 ; GFX10NSA-LABEL: name: load_cube 245 ; GFX10NSA: bb.1.main_body: 246 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 247 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 248 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 249 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 250 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 251 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 252 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 253 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 254 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 255 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 256 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 257 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 258 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 259 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 260 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 261 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 262 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 263 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 264 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 265 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 266 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 267 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 268 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 269 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 270 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 271 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) 272 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 273 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 274 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 275 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 276 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 277 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 278 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 279 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 280main_body: 281 %s = extractelement <2 x i16> %coords_lo, i32 0 282 %t = extractelement <2 x i16> %coords_lo, i32 1 283 %slice = extractelement <2 x i16> %coords_hi, i32 0 284 %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) 285 ret <4 x float> %v 286} 287 288define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 289 ; GFX9-LABEL: name: load_1darray 290 ; GFX9: bb.1.main_body: 291 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 292 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 293 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 294 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 295 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 296 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 297 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 298 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 299 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 300 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 301 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 302 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 303 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 304 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 305 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 306 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 307 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 308 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 309 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 310 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) 311 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 312 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 313 ; GFX9: $vgpr0 = COPY [[UV]](s32) 314 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 315 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 316 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 317 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 318 ; GFX10NSA-LABEL: name: load_1darray 319 ; GFX10NSA: bb.1.main_body: 320 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 321 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 322 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 323 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 324 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 325 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 326 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 327 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 328 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 329 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 330 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 331 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 332 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 333 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 334 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 335 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 336 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 337 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 338 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 339 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) 340 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 341 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 342 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 343 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 344 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 345 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 346 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 347main_body: 348 %s = extractelement <2 x i16> %coords, i32 0 349 %slice = extractelement <2 x i16> %coords, i32 1 350 %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32 15, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) 351 ret <4 x float> %v 352} 353 354define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 355 ; GFX9-LABEL: name: load_2darray 356 ; GFX9: bb.1.main_body: 357 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 358 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 359 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 360 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 361 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 362 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 363 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 364 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 365 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 366 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 367 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 368 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 369 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 370 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 371 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 372 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 373 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 374 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 375 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 376 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 377 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 378 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 379 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 380 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 381 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 382 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) 383 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 384 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 385 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 386 ; GFX9: $vgpr0 = COPY [[UV]](s32) 387 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 388 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 389 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 390 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 391 ; GFX10NSA-LABEL: name: load_2darray 392 ; GFX10NSA: bb.1.main_body: 393 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 394 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 395 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 396 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 397 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 398 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 399 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 400 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 401 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 402 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 403 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 404 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 405 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 406 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 407 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 408 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 409 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 410 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 411 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 412 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 413 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 414 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 415 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 416 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 417 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 418 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) 419 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 420 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 421 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 422 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 423 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 424 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 425 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 426 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 427main_body: 428 %s = extractelement <2 x i16> %coords_lo, i32 0 429 %t = extractelement <2 x i16> %coords_lo, i32 1 430 %slice = extractelement <2 x i16> %coords_hi, i32 0 431 %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) 432 ret <4 x float> %v 433} 434 435define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 436 ; GFX9-LABEL: name: load_2dmsaa 437 ; GFX9: bb.1.main_body: 438 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 439 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 440 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 441 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 442 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 443 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 444 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 445 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 446 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 447 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 448 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 449 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 450 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 451 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 452 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 453 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 454 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 455 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 456 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 457 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 458 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 459 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 460 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 461 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 462 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 463 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) 464 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 465 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 466 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 467 ; GFX9: $vgpr0 = COPY [[UV]](s32) 468 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 469 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 470 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 471 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 472 ; GFX10NSA-LABEL: name: load_2dmsaa 473 ; GFX10NSA: bb.1.main_body: 474 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 475 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 476 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 477 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 478 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 479 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 480 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 481 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 482 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 483 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 484 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 485 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 486 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 487 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 488 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 489 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 490 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 491 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 492 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 493 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 494 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 495 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 496 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 497 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 498 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 499 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) 500 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 501 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 502 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 503 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 504 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 505 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 506 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 507 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 508main_body: 509 %s = extractelement <2 x i16> %coords_lo, i32 0 510 %t = extractelement <2 x i16> %coords_lo, i32 1 511 %fragid = extractelement <2 x i16> %coords_hi, i32 0 512 %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 513 ret <4 x float> %v 514} 515 516define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 517 ; GFX9-LABEL: name: load_2darraymsaa 518 ; GFX9: bb.1.main_body: 519 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 520 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 521 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 522 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 523 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 524 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 525 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 526 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 527 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 528 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 529 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 530 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 531 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 532 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 533 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 534 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 535 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 536 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 537 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 538 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 539 ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 540 ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 541 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 542 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 543 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 544 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 545 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) 546 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) 547 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 548 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 549 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 550 ; GFX9: $vgpr0 = COPY [[UV]](s32) 551 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 552 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 553 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 554 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 555 ; GFX10NSA-LABEL: name: load_2darraymsaa 556 ; GFX10NSA: bb.1.main_body: 557 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 558 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 559 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 560 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 561 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 562 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 563 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 564 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 565 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 566 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 567 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 568 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 569 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 570 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 571 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 572 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 573 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 574 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 575 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 576 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 577 ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 578 ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 579 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 580 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 581 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 582 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 583 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) 584 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) 585 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 586 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 587 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 588 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 589 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 590 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 591 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 592 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 593main_body: 594 %s = extractelement <2 x i16> %coords_lo, i32 0 595 %t = extractelement <2 x i16> %coords_lo, i32 1 596 %slice = extractelement <2 x i16> %coords_hi, i32 0 597 %fragid = extractelement <2 x i16> %coords_hi, i32 1 598 %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 599 ret <4 x float> %v 600} 601 602define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 603 ; GFX9-LABEL: name: load_mip_1d 604 ; GFX9: bb.1.main_body: 605 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 606 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 607 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 608 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 609 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 610 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 611 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 612 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 613 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 614 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 615 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 616 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 617 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 618 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 619 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 620 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 621 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 622 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 623 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 624 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) 625 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 626 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 627 ; GFX9: $vgpr0 = COPY [[UV]](s32) 628 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 629 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 630 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 631 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 632 ; GFX10NSA-LABEL: name: load_mip_1d 633 ; GFX10NSA: bb.1.main_body: 634 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 635 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 636 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 637 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 638 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 639 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 640 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 641 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 642 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 643 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 644 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 645 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 646 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 647 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 648 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 649 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 650 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 651 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 652 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 653 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) 654 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 655 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 656 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 657 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 658 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 659 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 660 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 661main_body: 662 %s = extractelement <2 x i16> %coords, i32 0 663 %mip = extractelement <2 x i16> %coords, i32 1 664 %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 15, i16 %s, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 665 ret <4 x float> %v 666} 667 668define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 669 ; GFX9-LABEL: name: load_mip_2d 670 ; GFX9: bb.1.main_body: 671 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 672 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 673 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 674 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 675 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 676 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 677 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 678 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 679 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 680 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 681 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 682 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 683 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 684 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 685 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 686 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 687 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 688 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 689 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 690 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 691 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 692 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 693 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 694 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 695 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 696 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) 697 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 698 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 699 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 700 ; GFX9: $vgpr0 = COPY [[UV]](s32) 701 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 702 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 703 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 704 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 705 ; GFX10NSA-LABEL: name: load_mip_2d 706 ; GFX10NSA: bb.1.main_body: 707 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 708 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 709 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 710 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 711 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 712 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 713 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 714 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 715 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 716 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 717 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 718 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 719 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 720 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 721 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 722 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 723 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 724 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 725 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 726 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 727 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 728 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 729 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 730 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 731 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 732 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) 733 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 734 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 735 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 736 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 737 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 738 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 739 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 740 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 741main_body: 742 %s = extractelement <2 x i16> %coords_lo, i32 0 743 %t = extractelement <2 x i16> %coords_lo, i32 1 744 %mip = extractelement <2 x i16> %coords_hi, i32 0 745 %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 746 ret <4 x float> %v 747} 748 749define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 750 ; GFX9-LABEL: name: load_mip_3d 751 ; GFX9: bb.1.main_body: 752 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 753 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 754 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 755 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 756 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 757 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 758 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 759 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 760 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 761 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 762 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 763 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 764 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 765 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 766 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 767 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 768 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 769 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 770 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 771 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 772 ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 773 ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 774 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 775 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 776 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 777 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 778 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) 779 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) 780 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 781 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 782 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 783 ; GFX9: $vgpr0 = COPY [[UV]](s32) 784 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 785 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 786 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 787 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 788 ; GFX10NSA-LABEL: name: load_mip_3d 789 ; GFX10NSA: bb.1.main_body: 790 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 791 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 792 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 793 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 794 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 795 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 796 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 797 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 798 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 799 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 800 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 801 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 802 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 803 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 804 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 805 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 806 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 807 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 808 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 809 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 810 ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 811 ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 812 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 813 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 814 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 815 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 816 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) 817 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) 818 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 819 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 820 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 821 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 822 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 823 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 824 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 825 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 826main_body: 827 %s = extractelement <2 x i16> %coords_lo, i32 0 828 %t = extractelement <2 x i16> %coords_lo, i32 1 829 %r = extractelement <2 x i16> %coords_hi, i32 0 830 %mip = extractelement <2 x i16> %coords_hi, i32 1 831 %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %r, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 832 ret <4 x float> %v 833} 834 835define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 836 ; GFX9-LABEL: name: load_mip_cube 837 ; GFX9: bb.1.main_body: 838 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 839 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 840 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 841 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 842 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 843 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 844 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 845 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 846 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 847 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 848 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 849 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 850 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 851 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 852 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 853 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 854 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 855 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 856 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 857 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 858 ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 859 ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 860 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 861 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 862 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 863 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 864 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) 865 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) 866 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 867 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 868 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 869 ; GFX9: $vgpr0 = COPY [[UV]](s32) 870 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 871 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 872 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 873 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 874 ; GFX10NSA-LABEL: name: load_mip_cube 875 ; GFX10NSA: bb.1.main_body: 876 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 877 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 878 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 879 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 880 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 881 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 882 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 883 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 884 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 885 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 886 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 887 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 888 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 889 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 890 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 891 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 892 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 893 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 894 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 895 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 896 ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 897 ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 898 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 899 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 900 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 901 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 902 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) 903 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) 904 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 905 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 906 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 907 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 908 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 909 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 910 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 911 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 912main_body: 913 %s = extractelement <2 x i16> %coords_lo, i32 0 914 %t = extractelement <2 x i16> %coords_lo, i32 1 915 %slice = extractelement <2 x i16> %coords_hi, i32 0 916 %mip = extractelement <2 x i16> %coords_hi, i32 1 917 %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 918 ret <4 x float> %v 919} 920 921define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 922 ; GFX9-LABEL: name: load_mip_1darray 923 ; GFX9: bb.1.main_body: 924 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 925 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 926 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 927 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 928 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 929 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 930 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 931 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 932 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 933 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 934 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 935 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 936 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 937 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 938 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 939 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 940 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 941 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 942 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 943 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 944 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 945 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 946 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 947 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 948 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 949 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) 950 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 951 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 952 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 953 ; GFX9: $vgpr0 = COPY [[UV]](s32) 954 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 955 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 956 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 957 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 958 ; GFX10NSA-LABEL: name: load_mip_1darray 959 ; GFX10NSA: bb.1.main_body: 960 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 961 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 962 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 963 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 964 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 965 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 966 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 967 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 968 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 969 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 970 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 971 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 972 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 973 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 974 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 975 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 976 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 977 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 978 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 979 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 980 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 981 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 982 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 983 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 984 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 985 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) 986 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 987 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 988 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 989 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 990 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 991 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 992 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 993 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 994main_body: 995 %s = extractelement <2 x i16> %coords_lo, i32 0 996 %slice = extractelement <2 x i16> %coords_lo, i32 1 997 %mip = extractelement <2 x i16> %coords_hi, i32 0 998 %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 15, i16 %s, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 999 ret <4 x float> %v 1000} 1001 1002define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 1003 ; GFX9-LABEL: name: load_mip_2darray 1004 ; GFX9: bb.1.main_body: 1005 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1006 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1007 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1008 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1009 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1010 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1011 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1012 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1013 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1014 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 1015 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 1016 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1017 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1018 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 1019 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1020 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 1021 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1022 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1023 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 1024 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1025 ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 1026 ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 1027 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1028 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1029 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 1030 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 1031 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) 1032 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) 1033 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1034 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 1035 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 1036 ; GFX9: $vgpr0 = COPY [[UV]](s32) 1037 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 1038 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 1039 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 1040 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 1041 ; GFX10NSA-LABEL: name: load_mip_2darray 1042 ; GFX10NSA: bb.1.main_body: 1043 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1044 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1045 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1046 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1047 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1048 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1049 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1050 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1051 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1052 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 1053 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 1054 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1055 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1056 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 1057 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1058 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 1059 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1060 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1061 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 1062 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1063 ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 1064 ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 1065 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1066 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1067 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 1068 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 1069 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) 1070 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) 1071 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1072 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 1073 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 1074 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 1075 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 1076 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 1077 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 1078 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 1079main_body: 1080 %s = extractelement <2 x i16> %coords_lo, i32 0 1081 %t = extractelement <2 x i16> %coords_lo, i32 1 1082 %slice = extractelement <2 x i16> %coords_hi, i32 0 1083 %mip = extractelement <2 x i16> %coords_hi, i32 1 1084 %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 1085 ret <4 x float> %v 1086} 1087 1088define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { 1089 ; GFX9-LABEL: name: store_1d 1090 ; GFX9: bb.1.main_body: 1091 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1092 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1093 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1094 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1095 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1096 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1097 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1098 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1099 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1100 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1101 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1102 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1103 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1104 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1105 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1106 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1107 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1108 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1109 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1110 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 1111 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1112 ; GFX9: S_ENDPGM 0 1113 ; GFX10NSA-LABEL: name: store_1d 1114 ; GFX10NSA: bb.1.main_body: 1115 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1116 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1117 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1118 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1119 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1120 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1121 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1122 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1123 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1124 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1125 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1126 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1127 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1128 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1129 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1130 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1131 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1132 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1133 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1134 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 1135 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1136 ; GFX10NSA: S_ENDPGM 0 1137main_body: 1138 %s = extractelement <2 x i16> %coords, i32 0 1139 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 1140 ret void 1141} 1142 1143define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { 1144 ; GFX9-LABEL: name: store_2d 1145 ; GFX9: bb.1.main_body: 1146 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1147 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1148 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1149 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1150 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1151 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1152 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1153 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1154 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1155 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1156 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1157 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1158 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1159 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1160 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1161 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1162 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1163 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1164 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1165 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1166 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1167 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1168 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1169 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1170 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) 1171 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1172 ; GFX9: S_ENDPGM 0 1173 ; GFX10NSA-LABEL: name: store_2d 1174 ; GFX10NSA: bb.1.main_body: 1175 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1176 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1177 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1178 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1179 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1180 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1181 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1182 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1183 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1184 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1185 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1186 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1187 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1188 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1189 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1190 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1191 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1192 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1193 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1194 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1195 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1196 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1197 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1198 ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1199 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) 1200 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1201 ; GFX10NSA: S_ENDPGM 0 1202main_body: 1203 %s = extractelement <2 x i16> %coords, i32 0 1204 %t = extractelement <2 x i16> %coords, i32 1 1205 call void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0) 1206 ret void 1207} 1208 1209define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 1210 ; GFX9-LABEL: name: store_3d 1211 ; GFX9: bb.1.main_body: 1212 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1213 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1214 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1215 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1216 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1217 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1218 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1219 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1220 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1221 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1222 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1223 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1224 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1225 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1226 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1227 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1228 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1229 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1230 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1231 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1232 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1233 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1234 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1235 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1236 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1237 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1238 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1239 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) 1240 ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 1241 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1242 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) 1243 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1244 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1245 ; GFX9: S_ENDPGM 0 1246 ; GFX10NSA-LABEL: name: store_3d 1247 ; GFX10NSA: bb.1.main_body: 1248 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1249 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1250 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1251 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1252 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1253 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1254 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1255 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1256 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1257 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1258 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1259 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1260 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1261 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1262 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1263 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1264 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1265 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1266 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1267 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1268 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1269 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1270 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1271 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1272 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1273 ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1274 ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1275 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) 1276 ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 1277 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1278 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) 1279 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1280 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1281 ; GFX10NSA: S_ENDPGM 0 1282main_body: 1283 %s = extractelement <2 x i16> %coords_lo, i32 0 1284 %t = extractelement <2 x i16> %coords_lo, i32 1 1285 %r = extractelement <2 x i16> %coords_hi, i32 0 1286 call void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) 1287 ret void 1288} 1289 1290define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 1291 ; GFX9-LABEL: name: store_cube 1292 ; GFX9: bb.1.main_body: 1293 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1294 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1295 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1296 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1297 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1298 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1299 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1300 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1301 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1302 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1303 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1304 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1305 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1306 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1307 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1308 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1309 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1310 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1311 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1312 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1313 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1314 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1315 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1316 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1317 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1318 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1319 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1320 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) 1321 ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 1322 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1323 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) 1324 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1325 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1326 ; GFX9: S_ENDPGM 0 1327 ; GFX10NSA-LABEL: name: store_cube 1328 ; GFX10NSA: bb.1.main_body: 1329 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1330 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1331 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1332 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1333 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1334 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1335 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1336 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1337 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1338 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1339 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1340 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1341 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1342 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1343 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1344 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1345 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1346 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1347 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1348 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1349 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1350 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1351 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1352 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1353 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1354 ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1355 ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1356 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) 1357 ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 1358 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1359 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) 1360 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1361 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1362 ; GFX10NSA: S_ENDPGM 0 1363main_body: 1364 %s = extractelement <2 x i16> %coords_lo, i32 0 1365 %t = extractelement <2 x i16> %coords_lo, i32 1 1366 %slice = extractelement <2 x i16> %coords_hi, i32 0 1367 call void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) 1368 ret void 1369} 1370 1371define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { 1372 ; GFX9-LABEL: name: store_1darray 1373 ; GFX9: bb.1.main_body: 1374 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1375 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1376 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1377 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1378 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1379 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1380 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1381 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1382 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1383 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1384 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1385 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1386 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1387 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1388 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1389 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1390 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1391 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1392 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1393 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1394 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1395 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1396 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1397 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1398 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) 1399 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1400 ; GFX9: S_ENDPGM 0 1401 ; GFX10NSA-LABEL: name: store_1darray 1402 ; GFX10NSA: bb.1.main_body: 1403 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1404 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1405 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1406 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1407 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1408 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1409 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1410 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1411 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1412 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1413 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1414 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1415 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1416 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1417 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1418 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1419 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1420 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1421 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1422 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1423 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1424 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1425 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1426 ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1427 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) 1428 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1429 ; GFX10NSA: S_ENDPGM 0 1430main_body: 1431 %s = extractelement <2 x i16> %coords, i32 0 1432 %slice = extractelement <2 x i16> %coords, i32 1 1433 call void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) 1434 ret void 1435} 1436 1437define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 1438 ; GFX9-LABEL: name: store_2darray 1439 ; GFX9: bb.1.main_body: 1440 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1441 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1442 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1443 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1444 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1445 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1446 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1447 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1448 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1449 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1450 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1451 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1452 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1453 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1454 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1455 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1456 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1457 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1458 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1459 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1460 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1461 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1462 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1463 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1464 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1465 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1466 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1467 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) 1468 ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 1469 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1470 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) 1471 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1472 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1473 ; GFX9: S_ENDPGM 0 1474 ; GFX10NSA-LABEL: name: store_2darray 1475 ; GFX10NSA: bb.1.main_body: 1476 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1477 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1478 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1479 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1480 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1481 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1482 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1483 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1484 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1485 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1486 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1487 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1488 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1489 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1490 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1491 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1492 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1493 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1494 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1495 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1496 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1497 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1498 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1499 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1500 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1501 ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1502 ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1503 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) 1504 ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 1505 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1506 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) 1507 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1508 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1509 ; GFX10NSA: S_ENDPGM 0 1510main_body: 1511 %s = extractelement <2 x i16> %coords_lo, i32 0 1512 %t = extractelement <2 x i16> %coords_lo, i32 1 1513 %slice = extractelement <2 x i16> %coords_hi, i32 0 1514 call void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) 1515 ret void 1516} 1517 1518define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 1519 ; GFX9-LABEL: name: store_2dmsaa 1520 ; GFX9: bb.1.main_body: 1521 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1522 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1523 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1524 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1525 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1526 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1527 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1528 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1529 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1530 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1531 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1532 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1533 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1534 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1535 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1536 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1537 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1538 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1539 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1540 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1541 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1542 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1543 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1544 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1545 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1546 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1547 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1548 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) 1549 ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 1550 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1551 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) 1552 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1553 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1554 ; GFX9: S_ENDPGM 0 1555 ; GFX10NSA-LABEL: name: store_2dmsaa 1556 ; GFX10NSA: bb.1.main_body: 1557 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1558 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1559 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1560 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1561 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1562 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1563 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1564 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1565 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1566 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1567 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1568 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1569 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1570 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1571 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1572 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1573 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1574 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1575 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1576 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1577 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1578 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1579 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1580 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1581 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1582 ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1583 ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1584 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) 1585 ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 1586 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1587 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) 1588 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1589 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1590 ; GFX10NSA: S_ENDPGM 0 1591main_body: 1592 %s = extractelement <2 x i16> %coords_lo, i32 0 1593 %t = extractelement <2 x i16> %coords_lo, i32 1 1594 %fragid = extractelement <2 x i16> %coords_hi, i32 0 1595 call void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 1596 ret void 1597} 1598 1599define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 1600 ; GFX9-LABEL: name: store_2darraymsaa 1601 ; GFX9: bb.1.main_body: 1602 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1603 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1604 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1605 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1606 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1607 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1608 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1609 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1610 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1611 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1612 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1613 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1614 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1615 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1616 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1617 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1618 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1619 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1620 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1621 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1622 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1623 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1624 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1625 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1626 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1627 ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1628 ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 1629 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1630 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1631 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) 1632 ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 1633 ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) 1634 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) 1635 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1636 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1637 ; GFX9: S_ENDPGM 0 1638 ; GFX10NSA-LABEL: name: store_2darraymsaa 1639 ; GFX10NSA: bb.1.main_body: 1640 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1641 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1642 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1643 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1644 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1645 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1646 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1647 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1648 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1649 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1650 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1651 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1652 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1653 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1654 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1655 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1656 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1657 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1658 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1659 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1660 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1661 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1662 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1663 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1664 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1665 ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1666 ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 1667 ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1668 ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1669 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) 1670 ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 1671 ; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) 1672 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) 1673 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1674 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1675 ; GFX10NSA: S_ENDPGM 0 1676main_body: 1677 %s = extractelement <2 x i16> %coords_lo, i32 0 1678 %t = extractelement <2 x i16> %coords_lo, i32 1 1679 %slice = extractelement <2 x i16> %coords_hi, i32 0 1680 %fragid = extractelement <2 x i16> %coords_hi, i32 1 1681 call void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 1682 ret void 1683} 1684 1685define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { 1686 ; GFX9-LABEL: name: store_mip_1d 1687 ; GFX9: bb.1.main_body: 1688 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1689 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1690 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1691 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1692 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1693 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1694 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1695 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1696 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1697 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1698 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1699 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1700 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1701 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1702 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1703 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1704 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1705 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1706 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1707 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1708 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1709 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1710 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1711 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1712 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) 1713 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1714 ; GFX9: S_ENDPGM 0 1715 ; GFX10NSA-LABEL: name: store_mip_1d 1716 ; GFX10NSA: bb.1.main_body: 1717 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1718 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1719 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1720 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1721 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1722 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1723 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1724 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1725 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1726 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1727 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1728 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1729 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1730 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1731 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1732 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1733 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1734 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1735 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1736 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1737 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1738 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1739 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1740 ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1741 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) 1742 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1743 ; GFX10NSA: S_ENDPGM 0 1744main_body: 1745 %s = extractelement <2 x i16> %coords, i32 0 1746 %mip = extractelement <2 x i16> %coords, i32 1 1747 call void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 1748 ret void 1749} 1750 1751define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 1752 ; GFX9-LABEL: name: store_mip_2d 1753 ; GFX9: bb.1.main_body: 1754 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1755 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1756 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1757 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1758 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1759 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1760 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1761 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1762 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1763 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1764 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1765 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1766 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1767 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1768 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1769 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1770 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1771 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1772 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1773 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1774 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1775 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1776 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1777 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1778 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1779 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1780 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1781 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) 1782 ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 1783 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1784 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) 1785 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1786 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1787 ; GFX9: S_ENDPGM 0 1788 ; GFX10NSA-LABEL: name: store_mip_2d 1789 ; GFX10NSA: bb.1.main_body: 1790 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1791 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1792 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1793 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1794 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1795 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1796 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1797 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1798 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1799 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1800 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1801 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1802 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1803 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1804 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1805 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1806 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1807 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1808 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1809 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1810 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1811 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1812 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1813 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1814 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1815 ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1816 ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1817 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) 1818 ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 1819 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1820 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) 1821 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1822 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1823 ; GFX10NSA: S_ENDPGM 0 1824main_body: 1825 %s = extractelement <2 x i16> %coords_lo, i32 0 1826 %t = extractelement <2 x i16> %coords_lo, i32 1 1827 %mip = extractelement <2 x i16> %coords_hi, i32 0 1828 call void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 1829 ret void 1830} 1831 1832define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 1833 ; GFX9-LABEL: name: store_mip_3d 1834 ; GFX9: bb.1.main_body: 1835 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1836 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1837 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1838 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1839 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1840 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1841 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1842 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1843 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1844 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1845 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1846 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1847 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1848 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1849 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1850 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1851 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1852 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1853 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1854 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1855 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1856 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1857 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1858 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1859 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1860 ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1861 ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 1862 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1863 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1864 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) 1865 ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 1866 ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) 1867 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) 1868 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1869 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1870 ; GFX9: S_ENDPGM 0 1871 ; GFX10NSA-LABEL: name: store_mip_3d 1872 ; GFX10NSA: bb.1.main_body: 1873 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1874 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1875 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1876 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1877 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1878 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1879 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1880 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1881 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1882 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1883 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1884 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1885 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1886 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1887 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1888 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1889 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1890 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1891 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1892 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1893 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1894 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1895 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1896 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1897 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1898 ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1899 ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 1900 ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1901 ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1902 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) 1903 ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 1904 ; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) 1905 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) 1906 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1907 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1908 ; GFX10NSA: S_ENDPGM 0 1909main_body: 1910 %s = extractelement <2 x i16> %coords_lo, i32 0 1911 %t = extractelement <2 x i16> %coords_lo, i32 1 1912 %r = extractelement <2 x i16> %coords_hi, i32 0 1913 %mip = extractelement <2 x i16> %coords_hi, i32 1 1914 call void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %r, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 1915 ret void 1916} 1917 1918define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 1919 ; GFX9-LABEL: name: store_mip_cube 1920 ; GFX9: bb.1.main_body: 1921 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1922 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1923 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1924 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1925 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1926 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1927 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1928 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1929 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1930 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1931 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1932 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1933 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1934 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1935 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1936 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1937 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1938 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1939 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1940 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1941 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1942 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1943 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1944 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1945 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1946 ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1947 ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 1948 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1949 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1950 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) 1951 ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 1952 ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) 1953 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) 1954 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1955 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1956 ; GFX9: S_ENDPGM 0 1957 ; GFX10NSA-LABEL: name: store_mip_cube 1958 ; GFX10NSA: bb.1.main_body: 1959 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1960 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1961 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1962 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1963 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1964 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1965 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1966 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1967 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1968 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1969 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1970 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1971 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1972 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 1973 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 1974 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1975 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 1976 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1977 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1978 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 1979 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 1980 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1981 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 1982 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1983 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 1984 ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 1985 ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 1986 ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 1987 ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 1988 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) 1989 ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 1990 ; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) 1991 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) 1992 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1993 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 1994 ; GFX10NSA: S_ENDPGM 0 1995main_body: 1996 %s = extractelement <2 x i16> %coords_lo, i32 0 1997 %t = extractelement <2 x i16> %coords_lo, i32 1 1998 %slice = extractelement <2 x i16> %coords_hi, i32 0 1999 %mip = extractelement <2 x i16> %coords_hi, i32 1 2000 call void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 2001 ret void 2002} 2003 2004define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 2005 ; GFX9-LABEL: name: store_mip_1darray 2006 ; GFX9: bb.1.main_body: 2007 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 2008 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2009 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2010 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2011 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2012 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2013 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2014 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2015 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2016 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 2017 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 2018 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 2019 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 2020 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 2021 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 2022 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2023 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 2024 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2025 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 2026 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2027 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 2028 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 2029 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 2030 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 2031 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 2032 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 2033 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 2034 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) 2035 ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 2036 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 2037 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) 2038 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 2039 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 2040 ; GFX9: S_ENDPGM 0 2041 ; GFX10NSA-LABEL: name: store_mip_1darray 2042 ; GFX10NSA: bb.1.main_body: 2043 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 2044 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2045 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2046 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2047 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2048 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2049 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2050 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2051 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2052 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 2053 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 2054 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 2055 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 2056 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 2057 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 2058 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2059 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 2060 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2061 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 2062 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2063 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 2064 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 2065 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 2066 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 2067 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 2068 ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 2069 ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 2070 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) 2071 ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 2072 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 2073 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) 2074 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 2075 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 2076 ; GFX10NSA: S_ENDPGM 0 2077main_body: 2078 %s = extractelement <2 x i16> %coords_lo, i32 0 2079 %slice = extractelement <2 x i16> %coords_lo, i32 1 2080 %mip = extractelement <2 x i16> %coords_hi, i32 0 2081 call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 2082 ret void 2083} 2084 2085define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 2086 ; GFX9-LABEL: name: store_mip_2darray 2087 ; GFX9: bb.1.main_body: 2088 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 2089 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2090 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2091 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2092 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2093 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2094 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2095 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2096 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2097 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 2098 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 2099 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 2100 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 2101 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 2102 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 2103 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2104 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 2105 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2106 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 2107 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2108 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 2109 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 2110 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 2111 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 2112 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 2113 ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 2114 ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 2115 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 2116 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 2117 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) 2118 ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 2119 ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) 2120 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) 2121 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 2122 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 2123 ; GFX9: S_ENDPGM 0 2124 ; GFX10NSA-LABEL: name: store_mip_2darray 2125 ; GFX10NSA: bb.1.main_body: 2126 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 2127 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2128 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2129 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2130 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2131 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2132 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2133 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2134 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2135 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 2136 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 2137 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 2138 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 2139 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 2140 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 2141 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2142 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 2143 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2144 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 2145 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2146 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 2147 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 2148 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 2149 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 2150 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 2151 ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) 2152 ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 2153 ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 2154 ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 2155 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) 2156 ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 2157 ; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) 2158 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) 2159 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 2160 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 2161 ; GFX10NSA: S_ENDPGM 0 2162main_body: 2163 %s = extractelement <2 x i16> %coords_lo, i32 0 2164 %t = extractelement <2 x i16> %coords_lo, i32 1 2165 %slice = extractelement <2 x i16> %coords_hi, i32 0 2166 %mip = extractelement <2 x i16> %coords_hi, i32 1 2167 call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 2168 ret void 2169} 2170 2171define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2172 ; GFX9-LABEL: name: getresinfo_1d 2173 ; GFX9: bb.1.main_body: 2174 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2175 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2176 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2177 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2178 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2179 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2180 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2181 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2182 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2183 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2184 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2185 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2186 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2187 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2188 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2189 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2190 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2191 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2192 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2193 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2194 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2195 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2196 ; GFX10NSA-LABEL: name: getresinfo_1d 2197 ; GFX10NSA: bb.1.main_body: 2198 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2199 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2200 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2201 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2202 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2203 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2204 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2205 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2206 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2207 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2208 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2209 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2210 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2211 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2212 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2213 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2214 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2215 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2216 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2217 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2218 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2219 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2220main_body: 2221 %mip = extractelement <2 x i16> %coords, i32 0 2222 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 2223 ret <4 x float> %v 2224} 2225 2226define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2227 ; GFX9-LABEL: name: getresinfo_2d 2228 ; GFX9: bb.1.main_body: 2229 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2230 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2231 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2232 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2233 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2234 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2235 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2236 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2237 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2238 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2239 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2240 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2241 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2242 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2243 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2244 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2245 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2246 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2247 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2248 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2249 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2250 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2251 ; GFX10NSA-LABEL: name: getresinfo_2d 2252 ; GFX10NSA: bb.1.main_body: 2253 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2254 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2255 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2256 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2257 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2258 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2259 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2260 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2261 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2262 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2263 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2264 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2265 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2266 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2267 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2268 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2269 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2270 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2271 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2272 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2273 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2274 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2275main_body: 2276 %mip = extractelement <2 x i16> %coords, i32 0 2277 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 2278 ret <4 x float> %v 2279} 2280 2281define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2282 ; GFX9-LABEL: name: getresinfo_3d 2283 ; GFX9: bb.1.main_body: 2284 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2285 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2286 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2287 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2288 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2289 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2290 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2291 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2292 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2293 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2294 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2295 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2296 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2297 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2298 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2299 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2300 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2301 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2302 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2303 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2304 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2305 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2306 ; GFX10NSA-LABEL: name: getresinfo_3d 2307 ; GFX10NSA: bb.1.main_body: 2308 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2309 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2310 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2311 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2312 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2313 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2314 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2315 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2316 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2317 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2318 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2319 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2320 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2321 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2322 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2323 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2324 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2325 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2326 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2327 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2328 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2329 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2330main_body: 2331 %mip = extractelement <2 x i16> %coords, i32 0 2332 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 2333 ret <4 x float> %v 2334} 2335 2336define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2337 ; GFX9-LABEL: name: getresinfo_cube 2338 ; GFX9: bb.1.main_body: 2339 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2340 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2341 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2342 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2343 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2344 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2345 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2346 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2347 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2348 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2349 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2350 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2351 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2352 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2353 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2354 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2355 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2356 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2357 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2358 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2359 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2360 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2361 ; GFX10NSA-LABEL: name: getresinfo_cube 2362 ; GFX10NSA: bb.1.main_body: 2363 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2364 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2365 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2366 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2367 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2368 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2369 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2370 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2371 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2372 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2373 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2374 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2375 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2376 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2377 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2378 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2379 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2380 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2381 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2382 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2383 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2384 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2385main_body: 2386 %mip = extractelement <2 x i16> %coords, i32 0 2387 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 2388 ret <4 x float> %v 2389} 2390 2391define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2392 ; GFX9-LABEL: name: getresinfo_1darray 2393 ; GFX9: bb.1.main_body: 2394 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2395 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2396 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2397 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2398 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2399 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2400 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2401 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2402 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2403 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2404 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2405 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2406 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2407 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2408 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2409 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2410 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2411 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2412 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2413 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2414 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2415 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2416 ; GFX10NSA-LABEL: name: getresinfo_1darray 2417 ; GFX10NSA: bb.1.main_body: 2418 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2419 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2420 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2421 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2422 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2423 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2424 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2425 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2426 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2427 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2428 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2429 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2430 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2431 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2432 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2433 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2434 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2435 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2436 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2437 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2438 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2439 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2440main_body: 2441 %mip = extractelement <2 x i16> %coords, i32 0 2442 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 2443 ret <4 x float> %v 2444} 2445 2446define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2447 ; GFX9-LABEL: name: getresinfo_2darray 2448 ; GFX9: bb.1.main_body: 2449 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2450 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2451 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2452 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2453 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2454 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2455 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2456 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2457 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2458 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2459 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2460 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2461 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2462 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2463 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2464 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2465 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2466 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2467 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2468 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2469 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2470 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2471 ; GFX10NSA-LABEL: name: getresinfo_2darray 2472 ; GFX10NSA: bb.1.main_body: 2473 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2474 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2475 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2476 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2477 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2478 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2479 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2480 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2481 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2482 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2483 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2484 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2485 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2486 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2487 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2488 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2489 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2490 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2491 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2492 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2493 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2494 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2495main_body: 2496 %mip = extractelement <2 x i16> %coords, i32 0 2497 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 2498 ret <4 x float> %v 2499} 2500 2501define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2502 ; GFX9-LABEL: name: getresinfo_2dmsaa 2503 ; GFX9: bb.1.main_body: 2504 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2505 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2506 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2507 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2508 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2509 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2510 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2511 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2512 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2513 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2514 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2515 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2516 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2517 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2518 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2519 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2520 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2521 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2522 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2523 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2524 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2525 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2526 ; GFX10NSA-LABEL: name: getresinfo_2dmsaa 2527 ; GFX10NSA: bb.1.main_body: 2528 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2529 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2530 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2531 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2532 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2533 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2534 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2535 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2536 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2537 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2538 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2539 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2540 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2541 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2542 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2543 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2544 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2545 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2546 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2547 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2548 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2549 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2550main_body: 2551 %mip = extractelement <2 x i16> %coords, i32 0 2552 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 2553 ret <4 x float> %v 2554} 2555 2556define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2557 ; GFX9-LABEL: name: getresinfo_2darraymsaa 2558 ; GFX9: bb.1.main_body: 2559 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2560 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2561 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2562 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2563 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2564 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2565 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2566 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2567 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2568 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2569 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2570 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2571 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2572 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2573 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2574 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2575 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2576 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2577 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2578 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2579 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2580 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2581 ; GFX10NSA-LABEL: name: getresinfo_2darraymsaa 2582 ; GFX10NSA: bb.1.main_body: 2583 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2584 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2585 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2586 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2587 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2588 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2589 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2590 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2591 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2592 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2593 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2594 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2595 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2596 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2597 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2598 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 2599 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2600 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2601 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2602 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2603 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2604 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2605main_body: 2606 %mip = extractelement <2 x i16> %coords, i32 0 2607 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 2608 ret <4 x float> %v 2609} 2610 2611define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2612 ; GFX9-LABEL: name: load_1d_V1 2613 ; GFX9: bb.1.main_body: 2614 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2615 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2616 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2617 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2618 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2619 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2620 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2621 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2622 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2623 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2624 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2625 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2626 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2627 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2628 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2629 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 4 from custom "TargetCustom8") 2630 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 2631 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 2632 ; GFX10NSA-LABEL: name: load_1d_V1 2633 ; GFX10NSA: bb.1.main_body: 2634 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2635 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2636 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2637 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2638 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2639 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2640 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2641 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2642 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2643 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2644 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2645 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2646 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2647 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2648 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2649 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 4 from custom "TargetCustom8") 2650 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 2651 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 2652main_body: 2653 %s = extractelement <2 x i16> %coords, i32 0 2654 %v = call float @llvm.amdgcn.image.load.1d.f32.i16(i32 8, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 2655 ret float %v 2656} 2657 2658define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2659 ; GFX9-LABEL: name: load_1d_V2 2660 ; GFX9: bb.1.main_body: 2661 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2662 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2663 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2664 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2665 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2666 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2667 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2668 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2669 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2670 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2671 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2672 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2673 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2674 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2675 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2676 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 8 from custom "TargetCustom8") 2677 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 2678 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2679 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2680 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 2681 ; GFX10NSA-LABEL: name: load_1d_V2 2682 ; GFX10NSA: bb.1.main_body: 2683 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2684 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2685 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2686 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2687 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2688 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2689 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2690 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2691 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2692 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2693 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2694 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2695 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2696 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2697 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2698 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 8 from custom "TargetCustom8") 2699 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 2700 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2701 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2702 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 2703main_body: 2704 %s = extractelement <2 x i16> %coords, i32 0 2705 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32 9, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 2706 ret <2 x float> %v 2707} 2708 2709define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, <2 x i16> %coords) { 2710 ; GFX9-LABEL: name: store_1d_V1 2711 ; GFX9: bb.1.main_body: 2712 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 2713 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2714 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2715 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2716 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2717 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2718 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2719 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2720 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2721 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 2722 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 2723 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2724 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2725 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 2726 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2727 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2728 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 4 into custom "TargetCustom8") 2729 ; GFX9: S_ENDPGM 0 2730 ; GFX10NSA-LABEL: name: store_1d_V1 2731 ; GFX10NSA: bb.1.main_body: 2732 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 2733 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2734 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2735 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2736 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2737 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2738 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2739 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2740 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2741 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 2742 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 2743 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2744 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2745 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 2746 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2747 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2748 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 4 into custom "TargetCustom8") 2749 ; GFX10NSA: S_ENDPGM 0 2750main_body: 2751 %s = extractelement <2 x i16> %coords, i32 0 2752 call void @llvm.amdgcn.image.store.1d.f32.i16(float %vdata, i32 2, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 2753 ret void 2754} 2755 2756define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, <2 x i16> %coords) { 2757 ; GFX9-LABEL: name: store_1d_V2 2758 ; GFX9: bb.1.main_body: 2759 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 2760 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2761 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2762 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2763 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2764 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2765 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2766 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2767 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2768 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 2769 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 2770 ; GFX9: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 2771 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2772 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 2773 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2774 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) 2775 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2776 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2777 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 8 into custom "TargetCustom8") 2778 ; GFX9: S_ENDPGM 0 2779 ; GFX10NSA-LABEL: name: store_1d_V2 2780 ; GFX10NSA: bb.1.main_body: 2781 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 2782 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2783 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2784 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2785 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2786 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2787 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2788 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2789 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2790 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 2791 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 2792 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 2793 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2794 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 2795 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2796 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) 2797 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2798 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2799 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 8 into custom "TargetCustom8") 2800 ; GFX10NSA: S_ENDPGM 0 2801main_body: 2802 %s = extractelement <2 x i16> %coords, i32 0 2803 call void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float> %vdata, i32 12, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 2804 ret void 2805} 2806 2807define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2808 ; GFX9-LABEL: name: load_1d_glc 2809 ; GFX9: bb.1.main_body: 2810 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2811 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2812 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2813 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2814 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2815 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2816 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2817 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2818 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2819 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2820 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2821 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2822 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2823 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2824 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2825 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 2826 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2827 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2828 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2829 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2830 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2831 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2832 ; GFX10NSA-LABEL: name: load_1d_glc 2833 ; GFX10NSA: bb.1.main_body: 2834 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2835 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2836 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2837 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2838 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2839 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2840 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2841 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2842 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2843 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2844 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2845 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2846 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2847 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2848 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2849 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 2850 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2851 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2852 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2853 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2854 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2855 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2856main_body: 2857 %s = extractelement <2 x i16> %coords, i32 0 2858 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 1) 2859 ret <4 x float> %v 2860} 2861 2862define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2863 ; GFX9-LABEL: name: load_1d_slc 2864 ; GFX9: bb.1.main_body: 2865 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2866 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2867 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2868 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2869 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2870 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2871 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2872 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2873 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2874 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2875 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2876 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2877 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2878 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2879 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2880 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 2881 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2882 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2883 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2884 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2885 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2886 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2887 ; GFX10NSA-LABEL: name: load_1d_slc 2888 ; GFX10NSA: bb.1.main_body: 2889 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2890 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2891 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2892 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2893 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2894 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2895 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2896 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2897 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2898 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2899 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2900 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2901 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2902 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2903 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2904 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 2905 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2906 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2907 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2908 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2909 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2910 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2911main_body: 2912 %s = extractelement <2 x i16> %coords, i32 0 2913 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 2) 2914 ret <4 x float> %v 2915} 2916 2917define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 2918 ; GFX9-LABEL: name: load_1d_glc_slc 2919 ; GFX9: bb.1.main_body: 2920 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2921 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2922 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2923 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2924 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2925 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2926 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2927 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2928 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2929 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2930 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2931 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2932 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2933 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2934 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2935 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 2936 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2937 ; GFX9: $vgpr0 = COPY [[UV]](s32) 2938 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 2939 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 2940 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 2941 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2942 ; GFX10NSA-LABEL: name: load_1d_glc_slc 2943 ; GFX10NSA: bb.1.main_body: 2944 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 2945 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2946 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2947 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2948 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2949 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2950 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2951 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2952 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2953 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 2954 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2955 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2956 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 2957 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2958 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2959 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 2960 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 2961 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 2962 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 2963 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 2964 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 2965 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 2966main_body: 2967 %s = extractelement <2 x i16> %coords, i32 0 2968 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 3) 2969 ret <4 x float> %v 2970} 2971 2972define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { 2973 ; GFX9-LABEL: name: store_1d_glc 2974 ; GFX9: bb.1.main_body: 2975 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 2976 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 2977 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 2978 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 2979 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 2980 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 2981 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 2982 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 2983 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 2984 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 2985 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 2986 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 2987 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 2988 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 2989 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 2990 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 2991 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 2992 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 2993 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 2994 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 2995 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 2996 ; GFX9: S_ENDPGM 0 2997 ; GFX10NSA-LABEL: name: store_1d_glc 2998 ; GFX10NSA: bb.1.main_body: 2999 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 3000 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3001 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3002 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3003 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3004 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3005 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3006 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3007 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3008 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 3009 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 3010 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 3011 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 3012 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 3013 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 3014 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 3015 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 3016 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 3017 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 3018 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 3019 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 3020 ; GFX10NSA: S_ENDPGM 0 3021main_body: 3022 %s = extractelement <2 x i16> %coords, i32 0 3023 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 1) 3024 ret void 3025} 3026 3027define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { 3028 ; GFX9-LABEL: name: store_1d_slc 3029 ; GFX9: bb.1.main_body: 3030 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 3031 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3032 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3033 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3034 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3035 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3036 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3037 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3038 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3039 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 3040 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 3041 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 3042 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 3043 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 3044 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 3045 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 3046 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 3047 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 3048 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 3049 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 3050 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 3051 ; GFX9: S_ENDPGM 0 3052 ; GFX10NSA-LABEL: name: store_1d_slc 3053 ; GFX10NSA: bb.1.main_body: 3054 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 3055 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3056 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3057 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3058 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3059 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3060 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3061 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3062 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3063 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 3064 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 3065 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 3066 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 3067 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 3068 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 3069 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 3070 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 3071 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 3072 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 3073 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 3074 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 3075 ; GFX10NSA: S_ENDPGM 0 3076main_body: 3077 %s = extractelement <2 x i16> %coords, i32 0 3078 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 2) 3079 ret void 3080} 3081 3082define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { 3083 ; GFX9-LABEL: name: store_1d_glc_slc 3084 ; GFX9: bb.1.main_body: 3085 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 3086 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3087 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3088 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3089 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3090 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3091 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3092 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3093 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3094 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 3095 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 3096 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 3097 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 3098 ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 3099 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 3100 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 3101 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 3102 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 3103 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 3104 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 3105 ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 3106 ; GFX9: S_ENDPGM 0 3107 ; GFX10NSA-LABEL: name: store_1d_glc_slc 3108 ; GFX10NSA: bb.1.main_body: 3109 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 3110 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3111 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3112 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3113 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3114 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3115 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3116 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3117 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3118 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 3119 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 3120 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 3121 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 3122 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 3123 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 3124 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 3125 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 3126 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) 3127 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 3128 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 3129 ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store 16 into custom "TargetCustom8") 3130 ; GFX10NSA: S_ENDPGM 0 3131main_body: 3132 %s = extractelement <2 x i16> %coords, i32 0 3133 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 3) 3134 ret void 3135} 3136 3137define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { 3138 ; GFX9-LABEL: name: getresinfo_dmask0 3139 ; GFX9: bb.1.main_body: 3140 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 3141 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3142 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3143 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3144 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3145 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3146 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3147 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3148 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3149 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 3150 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF 3151 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) 3152 ; GFX9: $vgpr0 = COPY [[UV]](s32) 3153 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 3154 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 3155 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 3156 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 3157 ; GFX10NSA-LABEL: name: getresinfo_dmask0 3158 ; GFX10NSA: bb.1.main_body: 3159 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 3160 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3161 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3162 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3163 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3164 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3165 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3166 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3167 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3168 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 3169 ; GFX10NSA: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF 3170 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) 3171 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 3172 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 3173 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 3174 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 3175 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 3176main_body: 3177 %mip = extractelement <2 x i16> %coords, i32 0 3178 %r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 0, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) 3179 ret <4 x float> %r 3180} 3181 3182define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 3183 ; GFX9-LABEL: name: load_1d_tfe 3184 ; GFX9: bb.1.main_body: 3185 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 3186 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3187 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3188 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3189 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3190 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3191 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3192 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3193 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3194 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 3195 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 3196 ; GFX9: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 3197 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 3198 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3199 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 3200 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 3201 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 3202 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) 3203 ; GFX9: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 3204 ; GFX9: $vgpr0 = COPY [[UV]](s32) 3205 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 3206 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 3207 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 3208 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 3209 ; GFX10NSA-LABEL: name: load_1d_tfe 3210 ; GFX10NSA: bb.1.main_body: 3211 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 3212 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3213 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3214 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3215 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3216 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3217 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3218 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3219 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3220 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 3221 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 3222 ; GFX10NSA: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 3223 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 3224 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3225 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 3226 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 3227 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 3228 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) 3229 ; GFX10NSA: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 3230 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 3231 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 3232 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 3233 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 3234 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 3235main_body: 3236 %s = extractelement <2 x i16> %coords, i32 0 3237 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 1, i32 0) 3238 %data = extractvalue { <4 x float>, i32 } %v, 0 3239 %tfe = extractvalue { <4 x float>, i32 } %v, 1 3240 store i32 %tfe, i32 addrspace(1)* undef 3241 ret <4 x float> %data 3242} 3243 3244define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coords) { 3245 ; GFX9-LABEL: name: load_2d_tfe 3246 ; GFX9: bb.1.main_body: 3247 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 3248 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3249 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3250 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3251 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3252 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3253 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3254 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3255 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3256 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 3257 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 3258 ; GFX9: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 3259 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 3260 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3261 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 3262 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3263 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 3264 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 3265 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 3266 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 3267 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) 3268 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 3269 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) 3270 ; GFX9: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 3271 ; GFX9: $vgpr0 = COPY [[UV]](s32) 3272 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 3273 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 3274 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 3275 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 3276 ; GFX10NSA-LABEL: name: load_2d_tfe 3277 ; GFX10NSA: bb.1.main_body: 3278 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 3279 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3280 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3281 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3282 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3283 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3284 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3285 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3286 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3287 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 3288 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 3289 ; GFX10NSA: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 3290 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 3291 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3292 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 3293 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3294 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 3295 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 3296 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 3297 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 3298 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) 3299 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 3300 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) 3301 ; GFX10NSA: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 3302 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 3303 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 3304 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 3305 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 3306 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 3307main_body: 3308 %s = extractelement <2 x i16> %coords, i32 0 3309 %t = extractelement <2 x i16> %coords, i32 1 3310 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 1, i32 0) 3311 %data = extractvalue { <4 x float>, i32 } %v, 0 3312 %tfe = extractvalue { <4 x float>, i32 } %v, 1 3313 store i32 %tfe, i32 addrspace(1)* undef 3314 ret <4 x float> %data 3315} 3316 3317define amdgpu_ps <4 x float> @load_3d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 3318 ; GFX9-LABEL: name: load_3d_tfe 3319 ; GFX9: bb.1.main_body: 3320 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 3321 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3322 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3323 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3324 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3325 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3326 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3327 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3328 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3329 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 3330 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 3331 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 3332 ; GFX9: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 3333 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 3334 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3335 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 3336 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3337 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 3338 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 3339 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 3340 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 3341 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 3342 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 3343 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 3344 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 3345 ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 3346 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF1]](s32) 3347 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 3348 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 3349 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) 3350 ; GFX9: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 3351 ; GFX9: $vgpr0 = COPY [[UV]](s32) 3352 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 3353 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 3354 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 3355 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 3356 ; GFX10NSA-LABEL: name: load_3d_tfe 3357 ; GFX10NSA: bb.1.main_body: 3358 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 3359 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3360 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3361 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3362 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3363 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3364 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3365 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3366 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3367 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 3368 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 3369 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 3370 ; GFX10NSA: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 3371 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 3372 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3373 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 3374 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3375 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 3376 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 3377 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 3378 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 3379 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 3380 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 3381 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 3382 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 3383 ; GFX10NSA: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 3384 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF1]](s32) 3385 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 3386 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 3387 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) 3388 ; GFX10NSA: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 3389 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 3390 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 3391 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 3392 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 3393 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 3394main_body: 3395 %s = extractelement <2 x i16> %coords_lo, i32 0 3396 %t = extractelement <2 x i16> %coords_lo, i32 1 3397 %r = extractelement <2 x i16> %coords_hi, i32 0 3398 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 1, i32 0) 3399 %data = extractvalue { <4 x float>, i32 } %v, 0 3400 %tfe = extractvalue { <4 x float>, i32 } %v, 1 3401 store i32 %tfe, i32 addrspace(1)* undef 3402 ret <4 x float> %data 3403} 3404 3405define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { 3406 ; GFX9-LABEL: name: load_2darraymsaa_tfe 3407 ; GFX9: bb.1.main_body: 3408 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 3409 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3410 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3411 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3412 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3413 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3414 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3415 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3416 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3417 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 3418 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 3419 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 3420 ; GFX9: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 3421 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 3422 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3423 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 3424 ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3425 ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 3426 ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 3427 ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 3428 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 3429 ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 3430 ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 3431 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 3432 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 3433 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 3434 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 3435 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) 3436 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) 3437 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 3438 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 3439 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) 3440 ; GFX9: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 3441 ; GFX9: $vgpr0 = COPY [[UV]](s32) 3442 ; GFX9: $vgpr1 = COPY [[UV1]](s32) 3443 ; GFX9: $vgpr2 = COPY [[UV2]](s32) 3444 ; GFX9: $vgpr3 = COPY [[UV3]](s32) 3445 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 3446 ; GFX10NSA-LABEL: name: load_2darraymsaa_tfe 3447 ; GFX10NSA: bb.1.main_body: 3448 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 3449 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 3450 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 3451 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 3452 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 3453 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 3454 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 3455 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 3456 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 3457 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 3458 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 3459 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 3460 ; GFX10NSA: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 3461 ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 3462 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3463 ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 3464 ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) 3465 ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 3466 ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) 3467 ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 3468 ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 3469 ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) 3470 ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) 3471 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) 3472 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) 3473 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) 3474 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) 3475 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) 3476 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) 3477 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 3478 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") 3479 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) 3480 ; GFX10NSA: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 3481 ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) 3482 ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) 3483 ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) 3484 ; GFX10NSA: $vgpr3 = COPY [[UV3]](s32) 3485 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 3486main_body: 3487 %s = extractelement <2 x i16> %coords_lo, i32 0 3488 %t = extractelement <2 x i16> %coords_lo, i32 1 3489 %slice = extractelement <2 x i16> %coords_hi, i32 0 3490 %fragid = extractelement <2 x i16> %coords_hi, i32 1 3491 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 1, i32 0) 3492 %data = extractvalue { <4 x float>, i32 } %v, 0 3493 %tfe = extractvalue { <4 x float>, i32 } %v, 1 3494 store i32 %tfe, i32 addrspace(1)* undef 3495 ret <4 x float> %data 3496} 3497 3498declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3499declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3500declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3501declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3502declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3503declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3504declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3505declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3506declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3507declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3508declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3509declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3510declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3511declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3512declare void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float>, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3513declare void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3514declare void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3515declare void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3516declare void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3517declare void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3518declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3519declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3520declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3521declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3522declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3523declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3524declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3525declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3526declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3 3527declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3 3528declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3 3529declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3 3530declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3 3531declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3 3532declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3 3533declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3 3534declare float @llvm.amdgcn.image.load.1d.f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3535declare float @llvm.amdgcn.image.load.2d.f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3536declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3537declare void @llvm.amdgcn.image.store.1d.f32.i16(float, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3538declare void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float>, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #2 3539declare { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3540declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3541declare { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3542declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 3543 3544attributes #0 = { nounwind } 3545attributes #1 = { nounwind readonly } 3546attributes #2 = { nounwind writeonly } 3547attributes #3 = { nounwind readnone } 3548