1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=UNPACKED %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=PACKED %s 4 5define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 6 ; UNPACKED-LABEL: name: image_load_f16 7 ; UNPACKED: bb.1 (%ir-block.0): 8 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 9 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 10 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 11 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 12 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 13 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 14 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 15 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 16 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 17 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 18 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 19 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 20 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 21 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 22 ; UNPACKED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16) 23 ; UNPACKED: $vgpr0 = COPY [[ANYEXT]](s32) 24 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 25 ; PACKED-LABEL: name: image_load_f16 26 ; PACKED: bb.1 (%ir-block.0): 27 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 28 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 29 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 30 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 31 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 32 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 33 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 34 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 35 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 36 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 37 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 38 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 39 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 40 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 41 ; PACKED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16) 42 ; PACKED: $vgpr0 = COPY [[ANYEXT]](s32) 43 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 44 %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 45 ret half %tex 46} 47 48define amdgpu_ps <2 x half> @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 49 ; UNPACKED-LABEL: name: image_load_v2f16 50 ; UNPACKED: bb.1 (%ir-block.0): 51 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 52 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 53 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 54 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 55 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 56 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 57 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 58 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 59 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 60 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 61 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 62 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 63 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 64 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") 65 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 66 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 67 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] 68 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] 69 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 70 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) 71 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 72 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 73 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 74 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 75 ; PACKED-LABEL: name: image_load_v2f16 76 ; PACKED: bb.1 (%ir-block.0): 77 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 78 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 79 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 80 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 81 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 82 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 83 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 84 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 85 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 86 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 87 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 88 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 89 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 90 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") 91 ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) 92 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 93 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 94 ret <2 x half> %tex 95} 96 97define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 98 ; UNPACKED-LABEL: name: image_load_v3f16 99 ; UNPACKED: bb.1 (%ir-block.0): 100 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 101 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 102 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 103 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 104 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 105 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 106 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 107 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 108 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 109 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 110 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 111 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 112 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 113 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) 114 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 115 ; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 116 ; UNPACKED: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) 117 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) 118 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 119 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 120 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] 121 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] 122 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) 123 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 124 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 125 ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C1]] 126 ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] 127 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) 128 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] 129 ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 130 ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) 131 ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) 132 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 133 ; PACKED-LABEL: name: image_load_v3f16 134 ; PACKED: bb.1 (%ir-block.0): 135 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 136 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 137 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 138 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 139 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 140 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 141 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 142 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 143 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 144 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 145 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 146 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 147 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 148 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) 149 ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) 150 ; PACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 151 ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) 152 ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) 153 ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) 154 ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 155 ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 156 ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] 157 ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] 158 ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) 159 ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 160 ; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 161 ; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>) 162 ; PACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) 163 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 164 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 165 ret <3 x half> %tex 166} 167 168define amdgpu_ps <4 x half> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 169 ; UNPACKED-LABEL: name: image_load_v4f16 170 ; UNPACKED: bb.1 (%ir-block.0): 171 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 172 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 173 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 174 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 175 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 176 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 177 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 178 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 179 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 180 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 181 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 182 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 183 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 184 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") 185 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 186 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 187 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] 188 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] 189 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 190 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) 191 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 192 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 193 ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] 194 ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] 195 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) 196 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] 197 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 198 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 199 ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) 200 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 201 ; PACKED-LABEL: name: image_load_v4f16 202 ; PACKED: bb.1 (%ir-block.0): 203 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 204 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 205 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 206 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 207 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 208 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 209 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 210 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 211 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 212 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 213 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 214 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 215 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 216 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") 217 ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) 218 ; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>) 219 ; PACKED: $vgpr1 = COPY [[UV1]](<2 x s16>) 220 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 221 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 222 ret <4 x half> %tex 223} 224 225define amdgpu_ps half @image_load_tfe_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 226 ; UNPACKED-LABEL: name: image_load_tfe_f16 227 ; UNPACKED: bb.1 (%ir-block.0): 228 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 229 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 230 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 231 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 232 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 233 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 234 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 235 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 236 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 237 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 238 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 239 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 240 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 241 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 242 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 243 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 244 ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 245 ; UNPACKED: $vgpr0 = COPY [[UV]](s32) 246 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 247 ; PACKED-LABEL: name: image_load_tfe_f16 248 ; PACKED: bb.1 (%ir-block.0): 249 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 250 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 251 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 252 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 253 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 254 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 255 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 256 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 257 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 258 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 259 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 260 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 261 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 262 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 263 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 264 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 265 ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 266 ; PACKED: $vgpr0 = COPY [[UV]](s32) 267 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 268 %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 269 %tex = extractvalue { half, i32 } %res, 0 270 %tfe = extractvalue { half, i32 } %res, 1 271 store i32 %tfe, i32 addrspace(1)* undef 272 ret half %tex 273} 274 275define amdgpu_ps <2 x half> @image_load_tfe_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 276 ; UNPACKED-LABEL: name: image_load_tfe_v2f16 277 ; UNPACKED: bb.1 (%ir-block.0): 278 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 279 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 280 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 281 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 282 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 283 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 284 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 285 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 286 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 287 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 288 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 289 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 290 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 291 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 292 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") 293 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 294 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 295 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] 296 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] 297 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 298 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) 299 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 300 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 301 ; UNPACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 302 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 303 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 304 ; PACKED-LABEL: name: image_load_tfe_v2f16 305 ; PACKED: bb.1 (%ir-block.0): 306 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 307 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 308 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 309 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 310 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 311 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 312 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 313 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 314 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 315 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 316 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 317 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 318 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 319 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 320 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") 321 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 322 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 323 ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 324 ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 325 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 326 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 327 %tex = extractvalue { <2 x half>, i32 } %res, 0 328 %tfe = extractvalue { <2 x half>, i32 } %res, 1 329 store i32 %tfe, i32 addrspace(1)* undef 330 ret <2 x half> %tex 331} 332 333define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 334 ; UNPACKED-LABEL: name: image_load_tfe_v3f16 335 ; UNPACKED: bb.1 (%ir-block.0): 336 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 337 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 338 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 339 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 340 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 341 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 342 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 343 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 344 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 345 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 346 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 347 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 348 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 349 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 350 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) 351 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 352 ; UNPACKED: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 353 ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 354 ; UNPACKED: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) 355 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) 356 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 357 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 358 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] 359 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] 360 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) 361 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 362 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 363 ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C1]] 364 ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] 365 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) 366 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] 367 ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 368 ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) 369 ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) 370 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 371 ; PACKED-LABEL: name: image_load_tfe_v3f16 372 ; PACKED: bb.1 (%ir-block.0): 373 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 374 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 375 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 376 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 377 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 378 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 379 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 380 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 381 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 382 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 383 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 384 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 385 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 386 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 387 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) 388 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 389 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 390 ; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) 391 ; PACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 392 ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 393 ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>) 394 ; PACKED: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) 395 ; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) 396 ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 397 ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 398 ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] 399 ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] 400 ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) 401 ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 402 ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 403 ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 404 ; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) 405 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 406 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 407 %tex = extractvalue { <3 x half>, i32 } %res, 0 408 %tfe = extractvalue { <3 x half>, i32 } %res, 1 409 store i32 %tfe, i32 addrspace(1)* undef 410 ret <3 x half> %tex 411} 412 413define amdgpu_ps <4 x half> @image_load_tfe_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 414 ; UNPACKED-LABEL: name: image_load_tfe_v4f16 415 ; UNPACKED: bb.1 (%ir-block.0): 416 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 417 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 418 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 419 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 420 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 421 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 422 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 423 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 424 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 425 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 426 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 427 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 428 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 429 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 430 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") 431 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) 432 ; UNPACKED: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 433 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 434 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] 435 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] 436 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 437 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) 438 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 439 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 440 ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] 441 ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] 442 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) 443 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] 444 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 445 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 446 ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) 447 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 448 ; PACKED-LABEL: name: image_load_tfe_v4f16 449 ; PACKED: bb.1 (%ir-block.0): 450 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 451 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 452 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 453 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 454 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 455 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 456 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 457 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 458 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 459 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 460 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 461 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 462 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 463 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 464 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") 465 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 466 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 467 ; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) 468 ; PACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 469 ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 470 ; PACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) 471 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 472 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 473 %tex = extractvalue { <4 x half>, i32 } %res, 0 474 %tfe = extractvalue { <4 x half>, i32 } %res, 1 475 store i32 %tfe, i32 addrspace(1)* undef 476 ret <4 x half> %tex 477} 478 479define amdgpu_ps half @image_load_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 480 ; UNPACKED-LABEL: name: image_load_f16_dmask_0000 481 ; UNPACKED: bb.1 (%ir-block.0): 482 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 483 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 484 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 485 ; UNPACKED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 486 ; UNPACKED: $vgpr0 = COPY [[DEF]](s32) 487 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 488 ; PACKED-LABEL: name: image_load_f16_dmask_0000 489 ; PACKED: bb.1 (%ir-block.0): 490 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 491 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 492 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 493 ; PACKED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 494 ; PACKED: $vgpr0 = COPY [[DEF]](s32) 495 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 496 %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 497 ret half %tex 498} 499 500define amdgpu_ps <2 x half> @image_load_v2f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 501 ; UNPACKED-LABEL: name: image_load_v2f16_dmask_1000 502 ; UNPACKED: bb.1 (%ir-block.0): 503 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 504 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 505 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 506 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 507 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 508 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 509 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 510 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 511 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 512 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 513 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 514 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 515 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 516 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 517 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 518 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C]] 519 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 520 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 521 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) 522 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 523 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 524 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 525 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 526 ; PACKED-LABEL: name: image_load_v2f16_dmask_1000 527 ; PACKED: bb.1 (%ir-block.0): 528 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 529 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 530 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 531 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 532 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 533 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 534 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 535 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 536 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 537 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 538 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 539 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 540 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 541 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 542 ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) 543 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 544 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 545 ret <2 x half> %tex 546} 547 548define amdgpu_ps <2 x half> @image_load_v2f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 549 ; UNPACKED-LABEL: name: image_load_v2f16_dmask_0000 550 ; UNPACKED: bb.1 (%ir-block.0): 551 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 552 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 553 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 554 ; UNPACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 555 ; UNPACKED: $vgpr0 = COPY [[DEF]](<2 x s16>) 556 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 557 ; PACKED-LABEL: name: image_load_v2f16_dmask_0000 558 ; PACKED: bb.1 (%ir-block.0): 559 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 560 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 561 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 562 ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 563 ; PACKED: $vgpr0 = COPY [[DEF]](<2 x s16>) 564 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 565 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 566 ret <2 x half> %tex 567} 568 569define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 570 ; UNPACKED-LABEL: name: image_load_v3f16_dmask_1100 571 ; UNPACKED: bb.1 (%ir-block.0): 572 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 573 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 574 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 575 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 576 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 577 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 578 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 579 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 580 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 581 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 582 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 583 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 584 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 585 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") 586 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 587 ; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 588 ; UNPACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) 589 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) 590 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 591 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 592 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] 593 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] 594 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) 595 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 596 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 597 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 598 ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] 599 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) 600 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] 601 ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 602 ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) 603 ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) 604 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 605 ; PACKED-LABEL: name: image_load_v3f16_dmask_1100 606 ; PACKED: bb.1 (%ir-block.0): 607 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 608 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 609 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 610 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 611 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 612 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 613 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 614 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 615 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 616 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 617 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 618 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 619 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 620 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") 621 ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 622 ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 623 ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) 624 ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) 625 ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) 626 ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 627 ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 628 ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] 629 ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] 630 ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) 631 ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 632 ; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 633 ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) 634 ; PACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) 635 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 636 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 637 ret <3 x half> %tex 638} 639 640define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 641 ; UNPACKED-LABEL: name: image_load_v3f16_dmask_1000 642 ; UNPACKED: bb.1 (%ir-block.0): 643 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 644 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 645 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 646 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 647 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 648 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 649 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 650 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 651 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 652 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 653 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 654 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 655 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 656 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 657 ; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 658 ; UNPACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) 659 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) 660 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 661 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 662 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C1]] 663 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 664 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) 665 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 666 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 667 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] 668 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) 669 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] 670 ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 671 ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) 672 ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) 673 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 674 ; PACKED-LABEL: name: image_load_v3f16_dmask_1000 675 ; PACKED: bb.1 (%ir-block.0): 676 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 677 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 678 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 679 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 680 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 681 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 682 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 683 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 684 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 685 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 686 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 687 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 688 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 689 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 690 ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 691 ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 692 ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) 693 ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) 694 ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) 695 ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 696 ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 697 ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] 698 ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] 699 ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) 700 ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 701 ; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 702 ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) 703 ; PACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) 704 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 705 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 706 ret <3 x half> %tex 707} 708 709define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 710 ; UNPACKED-LABEL: name: image_load_v3f16_dmask_0000 711 ; UNPACKED: bb.1 (%ir-block.0): 712 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 713 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 714 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 715 ; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 716 ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 717 ; UNPACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) 718 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) 719 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 720 ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 721 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) 722 ; UNPACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) 723 ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) 724 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 725 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] 726 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] 727 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) 728 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 729 ; UNPACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 730 ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] 731 ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] 732 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) 733 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] 734 ; UNPACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 735 ; UNPACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) 736 ; UNPACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) 737 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 738 ; PACKED-LABEL: name: image_load_v3f16_dmask_0000 739 ; PACKED: bb.1 (%ir-block.0): 740 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 741 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 742 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 743 ; PACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 744 ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 745 ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) 746 ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) 747 ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 748 ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 749 ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) 750 ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) 751 ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) 752 ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 753 ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] 754 ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] 755 ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) 756 ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 757 ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 758 ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] 759 ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] 760 ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) 761 ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] 762 ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 763 ; PACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) 764 ; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) 765 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 766 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 767 ret <3 x half> %tex 768} 769 770define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 771 ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1110 772 ; UNPACKED: bb.1 (%ir-block.0): 773 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 774 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 775 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 776 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 777 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 778 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 779 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 780 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 781 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 782 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 783 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 784 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 785 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 786 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) 787 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 788 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 789 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] 790 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] 791 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 792 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) 793 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 794 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 795 ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] 796 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 797 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) 798 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] 799 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 800 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 801 ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) 802 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 803 ; PACKED-LABEL: name: image_load_v4f16_dmask_1110 804 ; PACKED: bb.1 (%ir-block.0): 805 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 806 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 807 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 808 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 809 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 810 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 811 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 812 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 813 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 814 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 815 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 816 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 817 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 818 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) 819 ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) 820 ; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>) 821 ; PACKED: $vgpr1 = COPY [[UV1]](<2 x s16>) 822 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 823 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 824 ret <4 x half> %tex 825} 826 827define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 828 ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1100 829 ; UNPACKED: bb.1 (%ir-block.0): 830 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 831 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 832 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 833 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 834 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 835 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 836 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 837 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 838 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 839 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 840 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 841 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 842 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 843 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") 844 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 845 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 846 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] 847 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] 848 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 849 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) 850 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 851 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 852 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 853 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) 854 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] 855 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 856 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 857 ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) 858 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 859 ; PACKED-LABEL: name: image_load_v4f16_dmask_1100 860 ; PACKED: bb.1 (%ir-block.0): 861 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 862 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 863 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 864 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 865 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 866 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 867 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 868 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 869 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 870 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 871 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 872 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 873 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 874 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") 875 ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 876 ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) 877 ; PACKED: $vgpr1 = COPY [[DEF]](<2 x s16>) 878 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 879 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 880 ret <4 x half> %tex 881} 882 883define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 884 ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1000 885 ; UNPACKED: bb.1 (%ir-block.0): 886 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 887 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 888 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 889 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 890 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 891 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 892 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 893 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 894 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 895 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 896 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 897 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 898 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 899 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 900 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 901 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C]] 902 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 903 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 904 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) 905 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 906 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 907 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] 908 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 909 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 910 ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) 911 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 912 ; PACKED-LABEL: name: image_load_v4f16_dmask_1000 913 ; PACKED: bb.1 (%ir-block.0): 914 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 915 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 916 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 917 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 918 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 919 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 920 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 921 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 922 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 923 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 924 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 925 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 926 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 927 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 928 ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 929 ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) 930 ; PACKED: $vgpr1 = COPY [[DEF]](<2 x s16>) 931 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 932 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 933 ret <4 x half> %tex 934} 935 936define amdgpu_ps <4 x half> @image_load_v4f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 937 ; UNPACKED-LABEL: name: image_load_v4f16_dmask_0000 938 ; UNPACKED: bb.1 (%ir-block.0): 939 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 940 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 941 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 942 ; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 943 ; UNPACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) 944 ; UNPACKED: $vgpr0 = COPY [[UV]](<2 x s16>) 945 ; UNPACKED: $vgpr1 = COPY [[UV1]](<2 x s16>) 946 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 947 ; PACKED-LABEL: name: image_load_v4f16_dmask_0000 948 ; PACKED: bb.1 (%ir-block.0): 949 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 950 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 951 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 952 ; PACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 953 ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) 954 ; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>) 955 ; PACKED: $vgpr1 = COPY [[UV1]](<2 x s16>) 956 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 957 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 958 ret <4 x half> %tex 959} 960 961define amdgpu_ps half @image_load_tfe_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 962 ; UNPACKED-LABEL: name: image_load_tfe_f16_dmask_0000 963 ; UNPACKED: bb.1 (%ir-block.0): 964 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 965 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 966 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 967 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 968 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 969 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 970 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 971 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 972 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 973 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 974 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 975 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 976 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 977 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 978 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 979 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 980 ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 981 ; UNPACKED: $vgpr0 = COPY [[UV]](s32) 982 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 983 ; PACKED-LABEL: name: image_load_tfe_f16_dmask_0000 984 ; PACKED: bb.1 (%ir-block.0): 985 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 986 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 987 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 988 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 989 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 990 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 991 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 992 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 993 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 994 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 995 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 996 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 997 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 998 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 999 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 1000 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1001 ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 1002 ; PACKED: $vgpr0 = COPY [[UV]](s32) 1003 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 1004 %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 1005 %tex = extractvalue { half, i32 } %res, 0 1006 %tfe = extractvalue { half, i32 } %res, 1 1007 store i32 %tfe, i32 addrspace(1)* undef 1008 ret half %tex 1009} 1010 1011define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 1012 ; UNPACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000 1013 ; UNPACKED: bb.1 (%ir-block.0): 1014 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1015 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1016 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1017 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1018 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1019 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1020 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1021 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1022 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1023 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1024 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1025 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1026 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1027 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1028 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 1029 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1030 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 1031 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] 1032 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1033 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1034 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) 1035 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 1036 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 1037 ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 1038 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1039 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 1040 ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000 1041 ; PACKED: bb.1 (%ir-block.0): 1042 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1043 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1044 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1045 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1046 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1047 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1048 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1049 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1050 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1051 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1052 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1053 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1054 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1055 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1056 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 1057 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1058 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 1059 ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 1060 ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1061 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 1062 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 1063 %tex = extractvalue { <2 x half>, i32 } %res, 0 1064 %tfe = extractvalue { <2 x half>, i32 } %res, 1 1065 store i32 %tfe, i32 addrspace(1)* undef 1066 ret <2 x half> %tex 1067} 1068 1069define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 1070 ; UNPACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000 1071 ; UNPACKED: bb.1 (%ir-block.0): 1072 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1073 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1074 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1075 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1076 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1077 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1078 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1079 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1080 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1081 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1082 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1083 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1084 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1085 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1086 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 1087 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1088 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 1089 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] 1090 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1091 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1092 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) 1093 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 1094 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 1095 ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 1096 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1097 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 1098 ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000 1099 ; PACKED: bb.1 (%ir-block.0): 1100 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1101 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1102 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1103 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1104 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1105 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1106 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1107 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1108 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1109 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1110 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1111 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1112 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1113 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1114 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 1115 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1116 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 1117 ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 1118 ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1119 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 1120 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 1121 %tex = extractvalue { <2 x half>, i32 } %res, 0 1122 %tfe = extractvalue { <2 x half>, i32 } %res, 1 1123 store i32 %tfe, i32 addrspace(1)* undef 1124 ret <2 x half> %tex 1125} 1126 1127define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 1128 ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100 1129 ; UNPACKED: bb.1 (%ir-block.0): 1130 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1131 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1132 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1133 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1134 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1135 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1136 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1137 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1138 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1139 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1140 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1141 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1142 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1143 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1144 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") 1145 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 1146 ; UNPACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 1147 ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 1148 ; UNPACKED: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) 1149 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) 1150 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1151 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 1152 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] 1153 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] 1154 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) 1155 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 1156 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 1157 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1158 ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] 1159 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) 1160 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] 1161 ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 1162 ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) 1163 ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) 1164 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1165 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100 1166 ; PACKED: bb.1 (%ir-block.0): 1167 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1168 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1169 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1170 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1171 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1172 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1173 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1174 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1175 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1176 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1177 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1178 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1179 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1180 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1181 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") 1182 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1183 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 1184 ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 1185 ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 1186 ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 1187 ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) 1188 ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) 1189 ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) 1190 ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1191 ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 1192 ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] 1193 ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] 1194 ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) 1195 ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 1196 ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 1197 ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1198 ; PACKED: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) 1199 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1200 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 1201 %tex = extractvalue { <3 x half>, i32 } %res, 0 1202 %tfe = extractvalue { <3 x half>, i32 } %res, 1 1203 store i32 %tfe, i32 addrspace(1)* undef 1204 ret <3 x half> %tex 1205} 1206 1207define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 1208 ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000 1209 ; UNPACKED: bb.1 (%ir-block.0): 1210 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1211 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1212 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1213 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1214 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1215 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1216 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1217 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1218 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1219 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1220 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1221 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1222 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1223 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1224 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 1225 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1226 ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 1227 ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 1228 ; UNPACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) 1229 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) 1230 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1231 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 1232 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] 1233 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1234 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) 1235 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 1236 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 1237 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] 1238 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) 1239 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] 1240 ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 1241 ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) 1242 ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) 1243 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1244 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000 1245 ; PACKED: bb.1 (%ir-block.0): 1246 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1247 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1248 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1249 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1250 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1251 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1252 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1253 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1254 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1255 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1256 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1257 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1258 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1259 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1260 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 1261 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1262 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 1263 ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 1264 ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 1265 ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 1266 ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) 1267 ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) 1268 ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) 1269 ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1270 ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 1271 ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] 1272 ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] 1273 ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) 1274 ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 1275 ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 1276 ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1277 ; PACKED: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) 1278 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1279 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 1280 %tex = extractvalue { <3 x half>, i32 } %res, 0 1281 %tfe = extractvalue { <3 x half>, i32 } %res, 1 1282 store i32 %tfe, i32 addrspace(1)* undef 1283 ret <3 x half> %tex 1284} 1285 1286define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 1287 ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000 1288 ; UNPACKED: bb.1 (%ir-block.0): 1289 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1290 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1291 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1292 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1293 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1294 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1295 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1296 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1297 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1298 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1299 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1300 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1301 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1302 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1303 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 1304 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1305 ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 1306 ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 1307 ; UNPACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) 1308 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) 1309 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1310 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 1311 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] 1312 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1313 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) 1314 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 1315 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 1316 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] 1317 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) 1318 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] 1319 ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 1320 ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) 1321 ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) 1322 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1323 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000 1324 ; PACKED: bb.1 (%ir-block.0): 1325 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1326 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1327 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1328 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1329 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1330 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1331 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1332 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1333 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1334 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1335 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1336 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1337 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1338 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1339 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 1340 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1341 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 1342 ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 1343 ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 1344 ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF 1345 ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) 1346 ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) 1347 ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) 1348 ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1349 ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 1350 ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] 1351 ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] 1352 ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) 1353 ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 1354 ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 1355 ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1356 ; PACKED: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) 1357 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1358 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 1359 %tex = extractvalue { <3 x half>, i32 } %res, 0 1360 %tfe = extractvalue { <3 x half>, i32 } %res, 1 1361 store i32 %tfe, i32 addrspace(1)* undef 1362 ret <3 x half> %tex 1363} 1364 1365define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 1366 ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1110 1367 ; UNPACKED: bb.1 (%ir-block.0): 1368 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1369 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1370 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1371 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1372 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1373 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1374 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1375 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1376 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1377 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1378 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1379 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1380 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1381 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1382 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) 1383 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 1384 ; UNPACKED: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 1385 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 1386 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] 1387 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] 1388 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1389 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) 1390 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 1391 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 1392 ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] 1393 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1394 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) 1395 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] 1396 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 1397 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1398 ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) 1399 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1400 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1110 1401 ; PACKED: bb.1 (%ir-block.0): 1402 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1403 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1404 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1405 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1406 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1407 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1408 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1409 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1410 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1411 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1412 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1413 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1414 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1415 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1416 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) 1417 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 1418 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 1419 ; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) 1420 ; PACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 1421 ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1422 ; PACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) 1423 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1424 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 1425 %tex = extractvalue { <4 x half>, i32 } %res, 0 1426 %tfe = extractvalue { <4 x half>, i32 } %res, 1 1427 store i32 %tfe, i32 addrspace(1)* undef 1428 ret <4 x half> %tex 1429} 1430 1431define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 1432 ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1100 1433 ; UNPACKED: bb.1 (%ir-block.0): 1434 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1435 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1436 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1437 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1438 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1439 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1440 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1441 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1442 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1443 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1444 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1445 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1446 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1447 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1448 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") 1449 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 1450 ; UNPACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 1451 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 1452 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] 1453 ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] 1454 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1455 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) 1456 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 1457 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 1458 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1459 ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) 1460 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] 1461 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 1462 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1463 ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) 1464 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1465 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1100 1466 ; PACKED: bb.1 (%ir-block.0): 1467 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1468 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1469 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1470 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1471 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1472 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1473 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1474 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1475 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1476 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1477 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1478 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1479 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1480 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1481 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") 1482 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1483 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 1484 ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 1485 ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 1486 ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1487 ; PACKED: $vgpr1 = COPY [[DEF1]](<2 x s16>) 1488 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1489 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 1490 %tex = extractvalue { <4 x half>, i32 } %res, 0 1491 %tfe = extractvalue { <4 x half>, i32 } %res, 1 1492 store i32 %tfe, i32 addrspace(1)* undef 1493 ret <4 x half> %tex 1494} 1495 1496define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 1497 ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1000 1498 ; UNPACKED: bb.1 (%ir-block.0): 1499 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1500 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1501 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1502 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1503 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1504 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1505 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1506 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1507 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1508 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1509 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1510 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1511 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1512 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1513 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 1514 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1515 ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 1516 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 1517 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] 1518 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1519 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1520 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) 1521 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 1522 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 1523 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] 1524 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 1525 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1526 ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) 1527 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1528 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1000 1529 ; PACKED: bb.1 (%ir-block.0): 1530 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1531 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1532 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1533 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1534 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1535 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1536 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1537 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1538 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1539 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1540 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1541 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1542 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1543 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1544 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 1545 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1546 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 1547 ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 1548 ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 1549 ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1550 ; PACKED: $vgpr1 = COPY [[DEF1]](<2 x s16>) 1551 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1552 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 1553 %tex = extractvalue { <4 x half>, i32 } %res, 0 1554 %tfe = extractvalue { <4 x half>, i32 } %res, 1 1555 store i32 %tfe, i32 addrspace(1)* undef 1556 ret <4 x half> %tex 1557} 1558 1559define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 1560 ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_0000 1561 ; UNPACKED: bb.1 (%ir-block.0): 1562 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1563 ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1564 ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1565 ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1566 ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1567 ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1568 ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1569 ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1570 ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1571 ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1572 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1573 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1574 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1575 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1576 ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 1577 ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1578 ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 1579 ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 1580 ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] 1581 ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 1582 ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 1583 ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) 1584 ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 1585 ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 1586 ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] 1587 ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 1588 ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1589 ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) 1590 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1591 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_0000 1592 ; PACKED: bb.1 (%ir-block.0): 1593 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 1594 ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1595 ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1596 ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1597 ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1598 ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1599 ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1600 ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1601 ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1602 ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1603 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1604 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1605 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1606 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1607 ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") 1608 ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 1609 ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) 1610 ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF 1611 ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) 1612 ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) 1613 ; PACKED: $vgpr1 = COPY [[DEF1]](<2 x s16>) 1614 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 1615 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 1616 %tex = extractvalue { <4 x half>, i32 } %res, 0 1617 %tfe = extractvalue { <4 x half>, i32 } %res, 1 1618 store i32 %tfe, i32 addrspace(1)* undef 1619 ret <4 x half> %tex 1620} 1621 1622declare half @llvm.amdgcn.image.load.2d.f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1623declare <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1624declare <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1625declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1626declare { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1627declare { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1628declare { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1629declare { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 1630 1631attributes #0 = { nounwind readonly } 1632