1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -o - %s | FileCheck -check-prefix=UNPACKED %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -o - %s | FileCheck -check-prefix=PACKED %s 4 5define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 6 ; UNPACKED-LABEL: name: image_load_f16 7 ; UNPACKED: bb.1 (%ir-block.0): 8 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 9 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 10 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 11 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 12 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 13 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 14 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 15 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 16 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 17 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 18 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 19 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 20 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 21 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 22 ; UNPACKED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16) 23 ; UNPACKED: $vgpr0 = COPY [[ANYEXT]](s32) 24 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 25 ; PACKED-LABEL: name: image_load_f16 26 ; PACKED: bb.1 (%ir-block.0): 27 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 28 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 29 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 30 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 31 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 32 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 33 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 34 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 35 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 36 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 37 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 38 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 39 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 40 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 41 ; PACKED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16) 42 ; PACKED: $vgpr0 = COPY [[ANYEXT]](s32) 43 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 44 %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 45 ret half %tex 46} 47 48define amdgpu_ps <2 x half> @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 49 ; UNPACKED-LABEL: name: image_load_v2f16 50 ; UNPACKED: bb.1 (%ir-block.0): 51 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 52 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 53 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 54 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 55 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 56 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 57 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 58 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 59 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 60 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 61 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 62 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 63 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 64 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 65 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 66 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 67 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) 68 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] 69 ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) 70 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] 71 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 72 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) 73 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 74 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 75 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 76 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 77 ; PACKED-LABEL: name: image_load_v2f16 78 ; PACKED: bb.1 (%ir-block.0): 79 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 80 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 81 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 82 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 83 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 84 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 85 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 86 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 87 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 88 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 89 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 90 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 91 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 92 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 93 ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) 94 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 95 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 96 ret <2 x half> %tex 97} 98 99define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 100 ; UNPACKED-LABEL: name: image_load_v3f16 101 ; UNPACKED: bb.1 (%ir-block.0): 102 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 103 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 104 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 105 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 106 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 107 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 108 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 109 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 110 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 111 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 112 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 113 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 114 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 115 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8) 116 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 117 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 118 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) 119 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] 120 ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) 121 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] 122 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 123 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) 124 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 125 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 126 ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) 127 ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]] 128 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 129 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) 130 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] 131 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 132 ; UNPACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) 133 ; UNPACKED: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 134 ; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 135 ; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 136 ; UNPACKED: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 137 ; UNPACKED: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32 138 ; UNPACKED: $vgpr0 = COPY [[EXTRACT1]](<2 x s16>) 139 ; UNPACKED: $vgpr1 = COPY [[EXTRACT2]](<2 x s16>) 140 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 141 ; PACKED-LABEL: name: image_load_v3f16 142 ; PACKED: bb.1 (%ir-block.0): 143 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 144 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 145 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 146 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 147 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 148 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 149 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 150 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 151 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 152 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 153 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 154 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 155 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 156 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8) 157 ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) 158 ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 159 ; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[DEF]](<2 x s16>) 160 ; PACKED: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) 161 ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 162 ; PACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV2]](<3 x s16>), 0 163 ; PACKED: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 164 ; PACKED: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32 165 ; PACKED: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) 166 ; PACKED: $vgpr1 = COPY [[EXTRACT1]](<2 x s16>) 167 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 168 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 169 ret <3 x half> %tex 170} 171 172define amdgpu_ps <4 x half> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 173 ; UNPACKED-LABEL: name: image_load_v4f16 174 ; UNPACKED: bb.1 (%ir-block.0): 175 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 176 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 177 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 178 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 179 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 180 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 181 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 182 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 183 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 184 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 185 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 186 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 187 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 188 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8") 189 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 190 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 191 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) 192 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] 193 ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) 194 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] 195 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 196 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) 197 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 198 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 199 ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) 200 ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]] 201 ; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) 202 ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]] 203 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) 204 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] 205 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 206 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 207 ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) 208 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 209 ; PACKED-LABEL: name: image_load_v4f16 210 ; PACKED: bb.1 (%ir-block.0): 211 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 212 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 213 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 214 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 215 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 216 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 217 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 218 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 219 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 220 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 221 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 222 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 223 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 224 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8") 225 ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) 226 ; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>) 227 ; PACKED: $vgpr1 = COPY [[UV1]](<2 x s16>) 228 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 229 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 230 ret <4 x half> %tex 231} 232 233define amdgpu_ps half @image_load_tfe_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 234 ; UNPACKED-LABEL: name: image_load_tfe_f16 235 ; UNPACKED: bb.1 (%ir-block.0): 236 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 237 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 238 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 239 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 240 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 241 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 242 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 243 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 244 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 245 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 246 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 247 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 248 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 249 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 250 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 251 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 252 ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 253 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) 254 ; UNPACKED: $vgpr0 = COPY [[COPY10]](s32) 255 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 256 ; PACKED-LABEL: name: image_load_tfe_f16 257 ; PACKED: bb.1 (%ir-block.0): 258 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 259 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 260 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 261 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 262 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 263 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 264 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 265 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 266 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 267 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 268 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 269 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 270 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 271 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 272 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 273 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 274 ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 275 ; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) 276 ; PACKED: $vgpr0 = COPY [[COPY10]](s32) 277 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 278 %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 279 %tex = extractvalue { half, i32 } %res, 0 280 %tfe = extractvalue { half, i32 } %res, 1 281 store i32 %tfe, i32 addrspace(1)* undef 282 ret half %tex 283} 284 285define amdgpu_ps <2 x half> @image_load_tfe_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 286 ; UNPACKED-LABEL: name: image_load_tfe_v2f16 287 ; UNPACKED: bb.1 (%ir-block.0): 288 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 289 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 290 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 291 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 292 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 293 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 294 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 295 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 296 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 297 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 298 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 299 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 300 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 301 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 302 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 303 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 304 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 305 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) 306 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] 307 ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) 308 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] 309 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 310 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) 311 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 312 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 313 ; UNPACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 314 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 315 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 316 ; PACKED-LABEL: name: image_load_tfe_v2f16 317 ; PACKED: bb.1 (%ir-block.0): 318 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 319 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 320 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 321 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 322 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 323 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 324 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 325 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 326 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 327 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 328 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 329 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 330 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 331 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 332 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 333 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 334 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 335 ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 336 ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 337 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 338 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 339 %tex = extractvalue { <2 x half>, i32 } %res, 0 340 %tfe = extractvalue { <2 x half>, i32 } %res, 1 341 store i32 %tfe, i32 addrspace(1)* undef 342 ret <2 x half> %tex 343} 344 345define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 346 ; UNPACKED-LABEL: name: image_load_tfe_v3f16 347 ; UNPACKED: bb.1 (%ir-block.0): 348 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 349 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 350 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 351 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 352 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 353 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 354 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 355 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 356 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 357 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 358 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 359 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 360 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 361 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 362 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8) 363 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 364 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 365 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) 366 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] 367 ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) 368 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] 369 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 370 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) 371 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 372 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 373 ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) 374 ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]] 375 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 376 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) 377 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] 378 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 379 ; UNPACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) 380 ; UNPACKED: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 381 ; UNPACKED: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 382 ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 383 ; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 384 ; UNPACKED: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 385 ; UNPACKED: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32 386 ; UNPACKED: $vgpr0 = COPY [[EXTRACT1]](<2 x s16>) 387 ; UNPACKED: $vgpr1 = COPY [[EXTRACT2]](<2 x s16>) 388 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 389 ; PACKED-LABEL: name: image_load_tfe_v3f16 390 ; PACKED: bb.1 (%ir-block.0): 391 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 392 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 393 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 394 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 395 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 396 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 397 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 398 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 399 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 400 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 401 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 402 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 403 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 404 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 405 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8) 406 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 407 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 408 ; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) 409 ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 410 ; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF1]](<2 x s16>) 411 ; PACKED: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) 412 ; PACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 413 ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 414 ; PACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV3]](<3 x s16>), 0 415 ; PACKED: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 416 ; PACKED: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32 417 ; PACKED: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) 418 ; PACKED: $vgpr1 = COPY [[EXTRACT1]](<2 x s16>) 419 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 420 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 421 %tex = extractvalue { <3 x half>, i32 } %res, 0 422 %tfe = extractvalue { <3 x half>, i32 } %res, 1 423 store i32 %tfe, i32 addrspace(1)* undef 424 ret <3 x half> %tex 425} 426 427define amdgpu_ps <4 x half> @image_load_tfe_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 428 ; UNPACKED-LABEL: name: image_load_tfe_v4f16 429 ; UNPACKED: bb.1 (%ir-block.0): 430 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 431 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 432 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 433 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 434 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 435 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 436 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 437 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 438 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 439 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 440 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 441 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 442 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 443 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 444 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8") 445 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) 446 ; UNPACKED: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 447 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 448 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) 449 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] 450 ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) 451 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] 452 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 453 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) 454 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 455 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 456 ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) 457 ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]] 458 ; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) 459 ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]] 460 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) 461 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] 462 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 463 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 464 ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) 465 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 466 ; PACKED-LABEL: name: image_load_tfe_v4f16 467 ; PACKED: bb.1 (%ir-block.0): 468 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 469 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 470 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 471 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 472 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 473 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 474 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 475 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 476 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 477 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 478 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 479 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 480 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 481 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 482 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8") 483 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 484 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 485 ; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) 486 ; PACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 487 ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 488 ; PACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) 489 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 490 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 491 %tex = extractvalue { <4 x half>, i32 } %res, 0 492 %tfe = extractvalue { <4 x half>, i32 } %res, 1 493 store i32 %tfe, i32 addrspace(1)* undef 494 ret <4 x half> %tex 495} 496 497define amdgpu_ps half @image_load_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 498 ; UNPACKED-LABEL: name: image_load_f16_dmask_0000 499 ; UNPACKED: bb.1 (%ir-block.0): 500 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 501 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 502 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 503 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 504 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 505 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 506 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 507 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 508 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 509 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 510 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 511 ; UNPACKED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 512 ; UNPACKED: $vgpr0 = COPY [[DEF]](s32) 513 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 514 ; PACKED-LABEL: name: image_load_f16_dmask_0000 515 ; PACKED: bb.1 (%ir-block.0): 516 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 517 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 518 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 519 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 520 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 521 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 522 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 523 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 524 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 525 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 526 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 527 ; PACKED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 528 ; PACKED: $vgpr0 = COPY [[DEF]](s32) 529 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 530 %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 531 ret half %tex 532} 533 534define amdgpu_ps <2 x half> @image_load_v2f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 535 ; UNPACKED-LABEL: name: image_load_v2f16_dmask_1000 536 ; UNPACKED: bb.1 (%ir-block.0): 537 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 538 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 539 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 540 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 541 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 542 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 543 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 544 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 545 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 546 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 547 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 548 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 549 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 550 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 551 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 552 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 553 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] 554 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 555 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 556 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) 557 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 558 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 559 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 560 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 561 ; PACKED-LABEL: name: image_load_v2f16_dmask_1000 562 ; PACKED: bb.1 (%ir-block.0): 563 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 564 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 565 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 566 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 567 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 568 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 569 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 570 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 571 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 572 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 573 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 574 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 575 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 576 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 577 ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) 578 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 579 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 580 ret <2 x half> %tex 581} 582 583define amdgpu_ps <2 x half> @image_load_v2f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 584 ; UNPACKED-LABEL: name: image_load_v2f16_dmask_0000 585 ; UNPACKED: bb.1 (%ir-block.0): 586 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 587 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 588 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 589 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 590 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 591 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 592 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 593 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 594 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 595 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 596 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 597 ; UNPACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 598 ; UNPACKED: $vgpr0 = COPY [[DEF]](<2 x s16>) 599 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 600 ; PACKED-LABEL: name: image_load_v2f16_dmask_0000 601 ; PACKED: bb.1 (%ir-block.0): 602 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 603 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 604 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 605 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 606 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 607 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 608 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 609 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 610 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 611 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 612 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 613 ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 614 ; PACKED: $vgpr0 = COPY [[DEF]](<2 x s16>) 615 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 616 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 617 ret <2 x half> %tex 618} 619 620define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 621 ; UNPACKED-LABEL: name: image_load_v3f16_dmask_1100 622 ; UNPACKED: bb.1 (%ir-block.0): 623 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 624 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 625 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 626 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 627 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 628 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 629 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 630 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 631 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 632 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 633 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 634 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 635 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 636 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 637 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 638 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 639 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) 640 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] 641 ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) 642 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] 643 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 644 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) 645 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 646 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 647 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 648 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) 649 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] 650 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 651 ; UNPACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) 652 ; UNPACKED: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 653 ; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 654 ; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 655 ; UNPACKED: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 656 ; UNPACKED: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32 657 ; UNPACKED: $vgpr0 = COPY [[EXTRACT1]](<2 x s16>) 658 ; UNPACKED: $vgpr1 = COPY [[EXTRACT2]](<2 x s16>) 659 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 660 ; PACKED-LABEL: name: image_load_v3f16_dmask_1100 661 ; PACKED: bb.1 (%ir-block.0): 662 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 663 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 664 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 665 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 666 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 667 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 668 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 669 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 670 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 671 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 672 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 673 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 674 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 675 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 676 ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 677 ; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) 678 ; PACKED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) 679 ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 680 ; PACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 681 ; PACKED: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 682 ; PACKED: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32 683 ; PACKED: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) 684 ; PACKED: $vgpr1 = COPY [[EXTRACT1]](<2 x s16>) 685 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 686 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 687 ret <3 x half> %tex 688} 689 690define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 691 ; UNPACKED-LABEL: name: image_load_v3f16_dmask_1000 692 ; UNPACKED: bb.1 (%ir-block.0): 693 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 694 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 695 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 696 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 697 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 698 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 699 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 700 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 701 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 702 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 703 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 704 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 705 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 706 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 707 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 708 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 709 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] 710 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 711 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 712 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) 713 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 714 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 715 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] 716 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 717 ; UNPACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) 718 ; UNPACKED: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 719 ; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 720 ; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 721 ; UNPACKED: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 722 ; UNPACKED: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32 723 ; UNPACKED: $vgpr0 = COPY [[EXTRACT1]](<2 x s16>) 724 ; UNPACKED: $vgpr1 = COPY [[EXTRACT2]](<2 x s16>) 725 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 726 ; PACKED-LABEL: name: image_load_v3f16_dmask_1000 727 ; PACKED: bb.1 (%ir-block.0): 728 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 729 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 730 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 731 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 732 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 733 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 734 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 735 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 736 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 737 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 738 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 739 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 740 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 741 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 742 ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 743 ; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) 744 ; PACKED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) 745 ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 746 ; PACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 747 ; PACKED: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 748 ; PACKED: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32 749 ; PACKED: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) 750 ; PACKED: $vgpr1 = COPY [[EXTRACT1]](<2 x s16>) 751 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 752 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 753 ret <3 x half> %tex 754} 755 756define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 757 ; UNPACKED-LABEL: name: image_load_v3f16_dmask_0000 758 ; UNPACKED: bb.1 (%ir-block.0): 759 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 760 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 761 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 762 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 763 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 764 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 765 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 766 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 767 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 768 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 769 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 770 ; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 771 ; UNPACKED: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 772 ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 773 ; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 774 ; UNPACKED: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 775 ; UNPACKED: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32 776 ; UNPACKED: $vgpr0 = COPY [[EXTRACT1]](<2 x s16>) 777 ; UNPACKED: $vgpr1 = COPY [[EXTRACT2]](<2 x s16>) 778 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 779 ; PACKED-LABEL: name: image_load_v3f16_dmask_0000 780 ; PACKED: bb.1 (%ir-block.0): 781 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 782 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 783 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 784 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 785 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 786 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 787 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 788 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 789 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 790 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 791 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 792 ; PACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 793 ; PACKED: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 794 ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 795 ; PACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 796 ; PACKED: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 797 ; PACKED: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32 798 ; PACKED: $vgpr0 = COPY [[EXTRACT1]](<2 x s16>) 799 ; PACKED: $vgpr1 = COPY [[EXTRACT2]](<2 x s16>) 800 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 801 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 802 ret <3 x half> %tex 803} 804 805define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 806 ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1110 807 ; UNPACKED: bb.1 (%ir-block.0): 808 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 809 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 810 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 811 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 812 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 813 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 814 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 815 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 816 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 817 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 818 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 819 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 820 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 821 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8) 822 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 823 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 824 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) 825 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] 826 ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) 827 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] 828 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 829 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) 830 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 831 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 832 ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) 833 ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]] 834 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 835 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) 836 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] 837 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 838 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 839 ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) 840 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 841 ; PACKED-LABEL: name: image_load_v4f16_dmask_1110 842 ; PACKED: bb.1 (%ir-block.0): 843 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 844 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 845 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 846 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 847 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 848 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 849 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 850 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 851 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 852 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 853 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 854 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 855 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 856 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8) 857 ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) 858 ; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>) 859 ; PACKED: $vgpr1 = COPY [[UV1]](<2 x s16>) 860 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 861 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 862 ret <4 x half> %tex 863} 864 865define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 866 ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1100 867 ; UNPACKED: bb.1 (%ir-block.0): 868 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 869 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 870 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 871 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 872 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 873 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 874 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 875 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 876 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 877 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 878 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 879 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 880 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 881 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 882 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 883 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 884 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) 885 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] 886 ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) 887 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] 888 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 889 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) 890 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 891 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 892 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 893 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) 894 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] 895 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 896 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 897 ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) 898 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 899 ; PACKED-LABEL: name: image_load_v4f16_dmask_1100 900 ; PACKED: bb.1 (%ir-block.0): 901 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 902 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 903 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 904 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 905 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 906 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 907 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 908 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 909 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 910 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 911 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 912 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 913 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 914 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 915 ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 916 ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) 917 ; PACKED: $vgpr1 = COPY [[DEF]](<2 x s16>) 918 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 919 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 920 ret <4 x half> %tex 921} 922 923define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 924 ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1000 925 ; UNPACKED: bb.1 (%ir-block.0): 926 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 927 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 928 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 929 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 930 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 931 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 932 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 933 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 934 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 935 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 936 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 937 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 938 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 939 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 940 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 941 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 942 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] 943 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 944 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 945 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) 946 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 947 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 948 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] 949 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 950 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 951 ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) 952 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 953 ; PACKED-LABEL: name: image_load_v4f16_dmask_1000 954 ; PACKED: bb.1 (%ir-block.0): 955 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 956 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 957 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 958 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 959 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 960 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 961 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 962 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 963 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 964 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 965 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 966 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 967 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 968 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 969 ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 970 ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) 971 ; PACKED: $vgpr1 = COPY [[DEF]](<2 x s16>) 972 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 973 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 974 ret <4 x half> %tex 975} 976 977define amdgpu_ps <4 x half> @image_load_v4f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 978 ; UNPACKED-LABEL: name: image_load_v4f16_dmask_0000 979 ; UNPACKED: bb.1 (%ir-block.0): 980 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 981 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 982 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 983 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 984 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 985 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 986 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 987 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 988 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 989 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 990 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 991 ; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 992 ; UNPACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) 993 ; UNPACKED: $vgpr0 = COPY [[UV]](<2 x s16>) 994 ; UNPACKED: $vgpr1 = COPY [[UV1]](<2 x s16>) 995 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 996 ; PACKED-LABEL: name: image_load_v4f16_dmask_0000 997 ; PACKED: bb.1 (%ir-block.0): 998 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 999 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1000 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1001 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1002 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1003 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1004 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1005 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1006 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1007 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1008 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1009 ; PACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 1010 ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) 1011 ; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>) 1012 ; PACKED: $vgpr1 = COPY [[UV1]](<2 x s16>) 1013 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1014 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 1015 ret <4 x half> %tex 1016} 1017 1018define amdgpu_ps half @image_load_tfe_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 1019 ; UNPACKED-LABEL: name: image_load_tfe_f16_dmask_0000 1020 ; UNPACKED: bb.1 (%ir-block.0): 1021 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1022 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1023 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1024 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1025 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1026 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1027 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1028 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1029 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1030 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1031 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1032 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1033 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1034 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1035 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 1036 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1037 ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1038 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) 1039 ; UNPACKED: $vgpr0 = COPY [[COPY10]](s32) 1040 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 1041 ; PACKED-LABEL: name: image_load_tfe_f16_dmask_0000 1042 ; PACKED: bb.1 (%ir-block.0): 1043 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1044 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1045 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1046 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1047 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1048 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1049 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1050 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1051 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1052 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1053 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1054 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1055 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1056 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1057 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 1058 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1059 ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1060 ; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) 1061 ; PACKED: $vgpr0 = COPY [[COPY10]](s32) 1062 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 1063 %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 1064 %tex = extractvalue { half, i32 } %res, 0 1065 %tfe = extractvalue { half, i32 } %res, 1 1066 store i32 %tfe, i32 addrspace(1)* undef 1067 ret half %tex 1068} 1069 1070define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 1071 ; UNPACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000 1072 ; UNPACKED: bb.1 (%ir-block.0): 1073 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1074 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1075 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1076 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1077 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1078 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1079 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1080 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1081 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1082 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1083 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1084 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1085 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1086 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1087 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 1088 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1089 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 1090 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) 1091 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] 1092 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1093 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1094 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) 1095 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 1096 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 1097 ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1098 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1099 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 1100 ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000 1101 ; PACKED: bb.1 (%ir-block.0): 1102 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1103 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1104 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1105 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1106 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1107 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1108 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1109 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1110 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1111 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1112 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1113 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1114 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1115 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1116 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 1117 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1118 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 1119 ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1120 ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1121 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 1122 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 1123 %tex = extractvalue { <2 x half>, i32 } %res, 0 1124 %tfe = extractvalue { <2 x half>, i32 } %res, 1 1125 store i32 %tfe, i32 addrspace(1)* undef 1126 ret <2 x half> %tex 1127} 1128 1129define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 1130 ; UNPACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000 1131 ; UNPACKED: bb.1 (%ir-block.0): 1132 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1133 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1134 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1135 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1136 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1137 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1138 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1139 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1140 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1141 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1142 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1143 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1144 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1145 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1146 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 1147 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1148 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 1149 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) 1150 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] 1151 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1152 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1153 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) 1154 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 1155 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 1156 ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1157 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1158 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 1159 ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000 1160 ; PACKED: bb.1 (%ir-block.0): 1161 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1162 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1163 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1164 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1165 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1166 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1167 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1168 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1169 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1170 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1171 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1172 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1173 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1174 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1175 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 1176 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1177 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 1178 ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1179 ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1180 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 1181 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 1182 %tex = extractvalue { <2 x half>, i32 } %res, 0 1183 %tfe = extractvalue { <2 x half>, i32 } %res, 1 1184 store i32 %tfe, i32 addrspace(1)* undef 1185 ret <2 x half> %tex 1186} 1187 1188define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 1189 ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100 1190 ; UNPACKED: bb.1 (%ir-block.0): 1191 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1192 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1193 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1194 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1195 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1196 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1197 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1198 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1199 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1200 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1201 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1202 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1203 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1204 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1205 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 1206 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 1207 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 1208 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) 1209 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] 1210 ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) 1211 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] 1212 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1213 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) 1214 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 1215 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 1216 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1217 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) 1218 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] 1219 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 1220 ; UNPACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) 1221 ; UNPACKED: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 1222 ; UNPACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1223 ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 1224 ; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 1225 ; UNPACKED: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 1226 ; UNPACKED: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32 1227 ; UNPACKED: $vgpr0 = COPY [[EXTRACT1]](<2 x s16>) 1228 ; UNPACKED: $vgpr1 = COPY [[EXTRACT2]](<2 x s16>) 1229 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1230 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100 1231 ; PACKED: bb.1 (%ir-block.0): 1232 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1233 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1234 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1235 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1236 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1237 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1238 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1239 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1240 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1241 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1242 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1243 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1244 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1245 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1246 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 1247 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1248 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 1249 ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 1250 ; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[DEF1]](<2 x s16>), [[DEF1]](<2 x s16>) 1251 ; PACKED: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) 1252 ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1253 ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 1254 ; PACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV2]](<3 x s16>), 0 1255 ; PACKED: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 1256 ; PACKED: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32 1257 ; PACKED: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) 1258 ; PACKED: $vgpr1 = COPY [[EXTRACT1]](<2 x s16>) 1259 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1260 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 1261 %tex = extractvalue { <3 x half>, i32 } %res, 0 1262 %tfe = extractvalue { <3 x half>, i32 } %res, 1 1263 store i32 %tfe, i32 addrspace(1)* undef 1264 ret <3 x half> %tex 1265} 1266 1267define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 1268 ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000 1269 ; UNPACKED: bb.1 (%ir-block.0): 1270 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1271 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1272 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1273 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1274 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1275 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1276 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1277 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1278 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1279 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1280 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1281 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1282 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1283 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1284 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 1285 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1286 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 1287 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) 1288 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] 1289 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1290 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1291 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) 1292 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 1293 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 1294 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] 1295 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 1296 ; UNPACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) 1297 ; UNPACKED: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 1298 ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1299 ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 1300 ; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 1301 ; UNPACKED: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 1302 ; UNPACKED: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32 1303 ; UNPACKED: $vgpr0 = COPY [[EXTRACT1]](<2 x s16>) 1304 ; UNPACKED: $vgpr1 = COPY [[EXTRACT2]](<2 x s16>) 1305 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1306 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000 1307 ; PACKED: bb.1 (%ir-block.0): 1308 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1309 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1310 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1311 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1312 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1313 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1314 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1315 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1316 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1317 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1318 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1319 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1320 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1321 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1322 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 1323 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1324 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 1325 ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 1326 ; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[DEF1]](<2 x s16>), [[DEF1]](<2 x s16>) 1327 ; PACKED: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) 1328 ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1329 ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 1330 ; PACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV2]](<3 x s16>), 0 1331 ; PACKED: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 1332 ; PACKED: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32 1333 ; PACKED: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) 1334 ; PACKED: $vgpr1 = COPY [[EXTRACT1]](<2 x s16>) 1335 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1336 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 1337 %tex = extractvalue { <3 x half>, i32 } %res, 0 1338 %tfe = extractvalue { <3 x half>, i32 } %res, 1 1339 store i32 %tfe, i32 addrspace(1)* undef 1340 ret <3 x half> %tex 1341} 1342 1343define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 1344 ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000 1345 ; UNPACKED: bb.1 (%ir-block.0): 1346 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1347 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1348 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1349 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1350 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1351 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1352 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1353 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1354 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1355 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1356 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1357 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1358 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1359 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1360 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 1361 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1362 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 1363 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) 1364 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] 1365 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1366 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1367 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) 1368 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 1369 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 1370 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] 1371 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 1372 ; UNPACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) 1373 ; UNPACKED: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 1374 ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1375 ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 1376 ; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 1377 ; UNPACKED: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 1378 ; UNPACKED: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32 1379 ; UNPACKED: $vgpr0 = COPY [[EXTRACT1]](<2 x s16>) 1380 ; UNPACKED: $vgpr1 = COPY [[EXTRACT2]](<2 x s16>) 1381 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1382 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000 1383 ; PACKED: bb.1 (%ir-block.0): 1384 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1385 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1386 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1387 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1388 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1389 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1390 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1391 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1392 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1393 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1394 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1395 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1396 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1397 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1398 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 1399 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1400 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 1401 ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 1402 ; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[DEF1]](<2 x s16>), [[DEF1]](<2 x s16>) 1403 ; PACKED: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) 1404 ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1405 ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 1406 ; PACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV2]](<3 x s16>), 0 1407 ; PACKED: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 1408 ; PACKED: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32 1409 ; PACKED: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) 1410 ; PACKED: $vgpr1 = COPY [[EXTRACT1]](<2 x s16>) 1411 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1412 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 1413 %tex = extractvalue { <3 x half>, i32 } %res, 0 1414 %tfe = extractvalue { <3 x half>, i32 } %res, 1 1415 store i32 %tfe, i32 addrspace(1)* undef 1416 ret <3 x half> %tex 1417} 1418 1419define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 1420 ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1110 1421 ; UNPACKED: bb.1 (%ir-block.0): 1422 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1423 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1424 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1425 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1426 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1427 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1428 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1429 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1430 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1431 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1432 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1433 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1434 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1435 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1436 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8) 1437 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 1438 ; UNPACKED: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1439 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 1440 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) 1441 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] 1442 ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) 1443 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] 1444 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1445 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) 1446 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 1447 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 1448 ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) 1449 ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]] 1450 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1451 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) 1452 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] 1453 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 1454 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1455 ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) 1456 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1457 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1110 1458 ; PACKED: bb.1 (%ir-block.0): 1459 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1460 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1461 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1462 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1463 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1464 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1465 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1466 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1467 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1468 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1469 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1470 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1471 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1472 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1473 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8) 1474 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 1475 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 1476 ; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) 1477 ; PACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1478 ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1479 ; PACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) 1480 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1481 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 1482 %tex = extractvalue { <4 x half>, i32 } %res, 0 1483 %tfe = extractvalue { <4 x half>, i32 } %res, 1 1484 store i32 %tfe, i32 addrspace(1)* undef 1485 ret <4 x half> %tex 1486} 1487 1488define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 1489 ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1100 1490 ; UNPACKED: bb.1 (%ir-block.0): 1491 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1492 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1493 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1494 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1495 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1496 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1497 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1498 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1499 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1500 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1501 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1502 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1503 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1504 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1505 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 1506 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 1507 ; UNPACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1508 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 1509 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) 1510 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] 1511 ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) 1512 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] 1513 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1514 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) 1515 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 1516 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 1517 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1518 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) 1519 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] 1520 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 1521 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1522 ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) 1523 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1524 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1100 1525 ; PACKED: bb.1 (%ir-block.0): 1526 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1527 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1528 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1529 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1530 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1531 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1532 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1533 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1534 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1535 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1536 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1537 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1538 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1539 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1540 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8") 1541 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1542 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 1543 ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 1544 ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1545 ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1546 ; PACKED: $vgpr1 = COPY [[DEF1]](<2 x s16>) 1547 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1548 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 1549 %tex = extractvalue { <4 x half>, i32 } %res, 0 1550 %tfe = extractvalue { <4 x half>, i32 } %res, 1 1551 store i32 %tfe, i32 addrspace(1)* undef 1552 ret <4 x half> %tex 1553} 1554 1555define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 1556 ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1000 1557 ; UNPACKED: bb.1 (%ir-block.0): 1558 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1559 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1560 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1561 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1562 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1563 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1564 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1565 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1566 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1567 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1568 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1569 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1570 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1571 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1572 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 1573 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1574 ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1575 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 1576 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) 1577 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] 1578 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1579 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1580 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) 1581 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 1582 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 1583 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] 1584 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 1585 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1586 ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) 1587 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1588 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1000 1589 ; PACKED: bb.1 (%ir-block.0): 1590 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1591 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1592 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1593 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1594 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1595 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1596 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1597 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1598 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1599 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1600 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1601 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1602 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1603 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1604 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 1605 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1606 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 1607 ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 1608 ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1609 ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1610 ; PACKED: $vgpr1 = COPY [[DEF1]](<2 x s16>) 1611 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1612 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 1613 %tex = extractvalue { <4 x half>, i32 } %res, 0 1614 %tfe = extractvalue { <4 x half>, i32 } %res, 1 1615 store i32 %tfe, i32 addrspace(1)* undef 1616 ret <4 x half> %tex 1617} 1618 1619define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 1620 ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_0000 1621 ; UNPACKED: bb.1 (%ir-block.0): 1622 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1623 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1624 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1625 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1626 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1627 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1628 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1629 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1630 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1631 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1632 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1633 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1634 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1635 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1636 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 1637 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1638 ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1639 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 1640 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) 1641 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] 1642 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1643 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1644 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) 1645 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 1646 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 1647 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] 1648 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 1649 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1650 ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) 1651 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1652 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_0000 1653 ; PACKED: bb.1 (%ir-block.0): 1654 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1655 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1656 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1657 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1658 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1659 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1660 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1661 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1662 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1663 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1664 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1665 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1666 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1667 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1668 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8") 1669 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1670 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 1671 ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 1672 ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) 1673 ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1674 ; PACKED: $vgpr1 = COPY [[DEF1]](<2 x s16>) 1675 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1676 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 1677 %tex = extractvalue { <4 x half>, i32 } %res, 0 1678 %tfe = extractvalue { <4 x half>, i32 } %res, 1 1679 store i32 %tfe, i32 addrspace(1)* undef 1680 ret <4 x half> %tex 1681} 1682 1683declare half @llvm.amdgcn.image.load.2d.f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1684declare <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1685declare <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1686declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1687declare { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1688declare { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1689declare { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1690declare { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1691 1692attributes #0 = { nounwind readonly } 1693