1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -o - %s | FileCheck -check-prefix=UNPACKED %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -o - %s | FileCheck -check-prefix=PACKED %s
4
5define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
6  ; UNPACKED-LABEL: name: image_load_f16
7  ; UNPACKED: bb.1 (%ir-block.0):
8  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
9  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
10  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
11  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
12  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
13  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
14  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
15  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
16  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
17  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
18  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
19  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
20  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
21  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
22  ; UNPACKED:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16)
23  ; UNPACKED:   $vgpr0 = COPY [[ANYEXT]](s32)
24  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
25  ; PACKED-LABEL: name: image_load_f16
26  ; PACKED: bb.1 (%ir-block.0):
27  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
28  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
29  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
30  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
31  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
32  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
33  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
34  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
35  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
36  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
37  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
38  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
39  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
40  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
41  ; PACKED:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16)
42  ; PACKED:   $vgpr0 = COPY [[ANYEXT]](s32)
43  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
44  %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
45  ret half %tex
46}
47
48define amdgpu_ps <2 x half> @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
49  ; UNPACKED-LABEL: name: image_load_v2f16
50  ; UNPACKED: bb.1 (%ir-block.0):
51  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
52  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
53  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
54  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
55  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
56  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
57  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
58  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
59  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
60  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
61  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
62  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
63  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
64  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
65  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
66  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
67  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
68  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
69  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
70  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
71  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
72  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
73  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
74  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
75  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
76  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
77  ; PACKED-LABEL: name: image_load_v2f16
78  ; PACKED: bb.1 (%ir-block.0):
79  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
80  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
81  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
82  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
83  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
84  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
85  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
86  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
87  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
88  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
89  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
90  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
91  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
92  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
93  ; PACKED:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
94  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
95  %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
96  ret <2 x half> %tex
97}
98
99define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
100  ; UNPACKED-LABEL: name: image_load_v3f16
101  ; UNPACKED: bb.1 (%ir-block.0):
102  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
103  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
104  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
105  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
106  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
107  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
108  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
109  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
110  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
111  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
112  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
113  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
114  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
115  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
116  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
117  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
118  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
119  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
120  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
121  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
122  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
123  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
124  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
125  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
126  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
127  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]]
128  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
129  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
130  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
131  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
132  ; UNPACKED:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
133  ; UNPACKED:   [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
134  ; UNPACKED:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
135  ; UNPACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
136  ; UNPACKED:   [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
137  ; UNPACKED:   [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32
138  ; UNPACKED:   $vgpr0 = COPY [[EXTRACT1]](<2 x s16>)
139  ; UNPACKED:   $vgpr1 = COPY [[EXTRACT2]](<2 x s16>)
140  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
141  ; PACKED-LABEL: name: image_load_v3f16
142  ; PACKED: bb.1 (%ir-block.0):
143  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
144  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
145  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
146  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
147  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
148  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
149  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
150  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
151  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
152  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
153  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
154  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
155  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
156  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
157  ; PACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>)
158  ; PACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
159  ; PACKED:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[DEF]](<2 x s16>)
160  ; PACKED:   [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
161  ; PACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
162  ; PACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV2]](<3 x s16>), 0
163  ; PACKED:   [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
164  ; PACKED:   [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32
165  ; PACKED:   $vgpr0 = COPY [[EXTRACT]](<2 x s16>)
166  ; PACKED:   $vgpr1 = COPY [[EXTRACT1]](<2 x s16>)
167  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
168  %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
169  ret <3 x half> %tex
170}
171
172define amdgpu_ps <4 x half> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
173  ; UNPACKED-LABEL: name: image_load_v4f16
174  ; UNPACKED: bb.1 (%ir-block.0):
175  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
176  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
177  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
178  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
179  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
180  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
181  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
182  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
183  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
184  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
185  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
186  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
187  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
188  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
189  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
190  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
191  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
192  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
193  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
194  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
195  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
196  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
197  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
198  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
199  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
200  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]]
201  ; UNPACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
202  ; UNPACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]]
203  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
204  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
205  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
206  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
207  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
208  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
209  ; PACKED-LABEL: name: image_load_v4f16
210  ; PACKED: bb.1 (%ir-block.0):
211  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
212  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
213  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
214  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
215  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
216  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
217  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
218  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
219  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
220  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
221  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
222  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
223  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
224  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
225  ; PACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>)
226  ; PACKED:   $vgpr0 = COPY [[UV]](<2 x s16>)
227  ; PACKED:   $vgpr1 = COPY [[UV1]](<2 x s16>)
228  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
229  %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
230  ret <4 x half> %tex
231}
232
233define amdgpu_ps half @image_load_tfe_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
234  ; UNPACKED-LABEL: name: image_load_tfe_f16
235  ; UNPACKED: bb.1 (%ir-block.0):
236  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
237  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
238  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
239  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
240  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
241  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
242  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
243  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
244  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
245  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
246  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
247  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
248  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
249  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
250  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
251  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
252  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
253  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
254  ; UNPACKED:   $vgpr0 = COPY [[COPY10]](s32)
255  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
256  ; PACKED-LABEL: name: image_load_tfe_f16
257  ; PACKED: bb.1 (%ir-block.0):
258  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
259  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
260  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
261  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
262  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
263  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
264  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
265  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
266  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
267  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
268  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
269  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
270  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
271  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
272  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
273  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
274  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
275  ; PACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
276  ; PACKED:   $vgpr0 = COPY [[COPY10]](s32)
277  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
278  %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
279  %tex = extractvalue { half, i32 } %res, 0
280  %tfe = extractvalue { half, i32 } %res, 1
281  store i32 %tfe, i32 addrspace(1)* undef
282  ret half %tex
283}
284
285define amdgpu_ps <2 x half> @image_load_tfe_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
286  ; UNPACKED-LABEL: name: image_load_tfe_v2f16
287  ; UNPACKED: bb.1 (%ir-block.0):
288  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
289  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
290  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
291  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
292  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
293  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
294  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
295  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
296  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
297  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
298  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
299  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
300  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
301  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
302  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
303  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
304  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
305  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
306  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
307  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
308  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
309  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
310  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
311  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
312  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
313  ; UNPACKED:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
314  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
315  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
316  ; PACKED-LABEL: name: image_load_tfe_v2f16
317  ; PACKED: bb.1 (%ir-block.0):
318  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
319  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
320  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
321  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
322  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
323  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
324  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
325  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
326  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
327  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
328  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
329  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
330  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
331  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
332  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
333  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
334  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
335  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
336  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
337  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
338  %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
339  %tex = extractvalue { <2 x half>, i32 } %res, 0
340  %tfe = extractvalue { <2 x half>, i32 } %res, 1
341  store i32 %tfe, i32 addrspace(1)* undef
342  ret <2 x half> %tex
343}
344
345define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
346  ; UNPACKED-LABEL: name: image_load_tfe_v3f16
347  ; UNPACKED: bb.1 (%ir-block.0):
348  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
349  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
350  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
351  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
352  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
353  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
354  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
355  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
356  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
357  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
358  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
359  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
360  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
361  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
362  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
363  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
364  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
365  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
366  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
367  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
368  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
369  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
370  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
371  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
372  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
373  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
374  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]]
375  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
376  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
377  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
378  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
379  ; UNPACKED:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
380  ; UNPACKED:   [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
381  ; UNPACKED:   G_STORE [[UV3]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
382  ; UNPACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
383  ; UNPACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
384  ; UNPACKED:   [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
385  ; UNPACKED:   [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32
386  ; UNPACKED:   $vgpr0 = COPY [[EXTRACT1]](<2 x s16>)
387  ; UNPACKED:   $vgpr1 = COPY [[EXTRACT2]](<2 x s16>)
388  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
389  ; PACKED-LABEL: name: image_load_tfe_v3f16
390  ; PACKED: bb.1 (%ir-block.0):
391  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
392  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
393  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
394  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
395  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
396  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
397  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
398  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
399  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
400  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
401  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
402  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
403  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
404  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
405  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
406  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
407  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
408  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
409  ; PACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
410  ; PACKED:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF1]](<2 x s16>)
411  ; PACKED:   [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
412  ; PACKED:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
413  ; PACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
414  ; PACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV3]](<3 x s16>), 0
415  ; PACKED:   [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
416  ; PACKED:   [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32
417  ; PACKED:   $vgpr0 = COPY [[EXTRACT]](<2 x s16>)
418  ; PACKED:   $vgpr1 = COPY [[EXTRACT1]](<2 x s16>)
419  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
420  %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
421  %tex = extractvalue { <3 x half>, i32 } %res, 0
422  %tfe = extractvalue { <3 x half>, i32 } %res, 1
423  store i32 %tfe, i32 addrspace(1)* undef
424  ret <3 x half> %tex
425}
426
427define amdgpu_ps <4 x half> @image_load_tfe_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
428  ; UNPACKED-LABEL: name: image_load_tfe_v4f16
429  ; UNPACKED: bb.1 (%ir-block.0):
430  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
431  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
432  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
433  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
434  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
435  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
436  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
437  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
438  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
439  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
440  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
441  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
442  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
443  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
444  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
445  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
446  ; UNPACKED:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
447  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
448  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
449  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
450  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
451  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
452  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
453  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
454  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
455  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
456  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
457  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]]
458  ; UNPACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
459  ; UNPACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]]
460  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
461  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
462  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
463  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
464  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
465  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
466  ; PACKED-LABEL: name: image_load_tfe_v4f16
467  ; PACKED: bb.1 (%ir-block.0):
468  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
469  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
470  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
471  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
472  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
473  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
474  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
475  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
476  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
477  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
478  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
479  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
480  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
481  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
482  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
483  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
484  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
485  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
486  ; PACKED:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
487  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
488  ; PACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
489  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
490  %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
491  %tex = extractvalue { <4 x half>, i32 } %res, 0
492  %tfe = extractvalue { <4 x half>, i32 } %res, 1
493  store i32 %tfe, i32 addrspace(1)* undef
494  ret <4 x half> %tex
495}
496
497define amdgpu_ps half @image_load_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
498  ; UNPACKED-LABEL: name: image_load_f16_dmask_0000
499  ; UNPACKED: bb.1 (%ir-block.0):
500  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
501  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
502  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
503  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
504  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
505  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
506  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
507  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
508  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
509  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
510  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
511  ; UNPACKED:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
512  ; UNPACKED:   $vgpr0 = COPY [[DEF]](s32)
513  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
514  ; PACKED-LABEL: name: image_load_f16_dmask_0000
515  ; PACKED: bb.1 (%ir-block.0):
516  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
517  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
518  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
519  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
520  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
521  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
522  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
523  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
524  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
525  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
526  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
527  ; PACKED:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
528  ; PACKED:   $vgpr0 = COPY [[DEF]](s32)
529  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
530  %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
531  ret half %tex
532}
533
534define amdgpu_ps <2 x half> @image_load_v2f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
535  ; UNPACKED-LABEL: name: image_load_v2f16_dmask_1000
536  ; UNPACKED: bb.1 (%ir-block.0):
537  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
538  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
539  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
540  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
541  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
542  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
543  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
544  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
545  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
546  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
547  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
548  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
549  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
550  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
551  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
552  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
553  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
554  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
555  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
556  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
557  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
558  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
559  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
560  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
561  ; PACKED-LABEL: name: image_load_v2f16_dmask_1000
562  ; PACKED: bb.1 (%ir-block.0):
563  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
564  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
565  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
566  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
567  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
568  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
569  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
570  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
571  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
572  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
573  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
574  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
575  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
576  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
577  ; PACKED:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
578  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
579  %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
580  ret <2 x half> %tex
581}
582
583define amdgpu_ps <2 x half> @image_load_v2f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
584  ; UNPACKED-LABEL: name: image_load_v2f16_dmask_0000
585  ; UNPACKED: bb.1 (%ir-block.0):
586  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
587  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
588  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
589  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
590  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
591  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
592  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
593  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
594  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
595  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
596  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
597  ; UNPACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
598  ; UNPACKED:   $vgpr0 = COPY [[DEF]](<2 x s16>)
599  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
600  ; PACKED-LABEL: name: image_load_v2f16_dmask_0000
601  ; PACKED: bb.1 (%ir-block.0):
602  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
603  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
604  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
605  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
606  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
607  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
608  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
609  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
610  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
611  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
612  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
613  ; PACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
614  ; PACKED:   $vgpr0 = COPY [[DEF]](<2 x s16>)
615  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
616  %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
617  ret <2 x half> %tex
618}
619
620define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
621  ; UNPACKED-LABEL: name: image_load_v3f16_dmask_1100
622  ; UNPACKED: bb.1 (%ir-block.0):
623  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
624  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
625  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
626  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
627  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
628  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
629  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
630  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
631  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
632  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
633  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
634  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
635  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
636  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
637  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
638  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
639  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
640  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
641  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
642  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
643  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
644  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
645  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
646  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
647  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
648  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
649  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
650  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
651  ; UNPACKED:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
652  ; UNPACKED:   [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
653  ; UNPACKED:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
654  ; UNPACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
655  ; UNPACKED:   [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
656  ; UNPACKED:   [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32
657  ; UNPACKED:   $vgpr0 = COPY [[EXTRACT1]](<2 x s16>)
658  ; UNPACKED:   $vgpr1 = COPY [[EXTRACT2]](<2 x s16>)
659  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
660  ; PACKED-LABEL: name: image_load_v3f16_dmask_1100
661  ; PACKED: bb.1 (%ir-block.0):
662  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
663  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
664  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
665  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
666  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
667  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
668  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
669  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
670  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
671  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
672  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
673  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
674  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
675  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
676  ; PACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
677  ; PACKED:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
678  ; PACKED:   [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
679  ; PACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
680  ; PACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
681  ; PACKED:   [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
682  ; PACKED:   [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32
683  ; PACKED:   $vgpr0 = COPY [[EXTRACT]](<2 x s16>)
684  ; PACKED:   $vgpr1 = COPY [[EXTRACT1]](<2 x s16>)
685  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
686  %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
687  ret <3 x half> %tex
688}
689
690define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
691  ; UNPACKED-LABEL: name: image_load_v3f16_dmask_1000
692  ; UNPACKED: bb.1 (%ir-block.0):
693  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
694  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
695  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
696  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
697  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
698  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
699  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
700  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
701  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
702  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
703  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
704  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
705  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
706  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
707  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
708  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
709  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
710  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
711  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
712  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
713  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
714  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
715  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
716  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
717  ; UNPACKED:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
718  ; UNPACKED:   [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
719  ; UNPACKED:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
720  ; UNPACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
721  ; UNPACKED:   [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
722  ; UNPACKED:   [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32
723  ; UNPACKED:   $vgpr0 = COPY [[EXTRACT1]](<2 x s16>)
724  ; UNPACKED:   $vgpr1 = COPY [[EXTRACT2]](<2 x s16>)
725  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
726  ; PACKED-LABEL: name: image_load_v3f16_dmask_1000
727  ; PACKED: bb.1 (%ir-block.0):
728  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
729  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
730  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
731  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
732  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
733  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
734  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
735  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
736  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
737  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
738  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
739  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
740  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
741  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
742  ; PACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
743  ; PACKED:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
744  ; PACKED:   [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
745  ; PACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
746  ; PACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
747  ; PACKED:   [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
748  ; PACKED:   [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32
749  ; PACKED:   $vgpr0 = COPY [[EXTRACT]](<2 x s16>)
750  ; PACKED:   $vgpr1 = COPY [[EXTRACT1]](<2 x s16>)
751  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
752  %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
753  ret <3 x half> %tex
754}
755
756define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
757  ; UNPACKED-LABEL: name: image_load_v3f16_dmask_0000
758  ; UNPACKED: bb.1 (%ir-block.0):
759  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
760  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
761  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
762  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
763  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
764  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
765  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
766  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
767  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
768  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
769  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
770  ; UNPACKED:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
771  ; UNPACKED:   [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
772  ; UNPACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
773  ; UNPACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
774  ; UNPACKED:   [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
775  ; UNPACKED:   [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32
776  ; UNPACKED:   $vgpr0 = COPY [[EXTRACT1]](<2 x s16>)
777  ; UNPACKED:   $vgpr1 = COPY [[EXTRACT2]](<2 x s16>)
778  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
779  ; PACKED-LABEL: name: image_load_v3f16_dmask_0000
780  ; PACKED: bb.1 (%ir-block.0):
781  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
782  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
783  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
784  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
785  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
786  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
787  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
788  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
789  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
790  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
791  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
792  ; PACKED:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
793  ; PACKED:   [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
794  ; PACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
795  ; PACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
796  ; PACKED:   [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
797  ; PACKED:   [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32
798  ; PACKED:   $vgpr0 = COPY [[EXTRACT1]](<2 x s16>)
799  ; PACKED:   $vgpr1 = COPY [[EXTRACT2]](<2 x s16>)
800  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
801  %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
802  ret <3 x half> %tex
803}
804
805define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
806  ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1110
807  ; UNPACKED: bb.1 (%ir-block.0):
808  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
809  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
810  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
811  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
812  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
813  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
814  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
815  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
816  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
817  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
818  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
819  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
820  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
821  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
822  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
823  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
824  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
825  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
826  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
827  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
828  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
829  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
830  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
831  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
832  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
833  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]]
834  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
835  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
836  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
837  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
838  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
839  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
840  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
841  ; PACKED-LABEL: name: image_load_v4f16_dmask_1110
842  ; PACKED: bb.1 (%ir-block.0):
843  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
844  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
845  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
846  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
847  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
848  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
849  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
850  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
851  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
852  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
853  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
854  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
855  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
856  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
857  ; PACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>)
858  ; PACKED:   $vgpr0 = COPY [[UV]](<2 x s16>)
859  ; PACKED:   $vgpr1 = COPY [[UV1]](<2 x s16>)
860  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
861  %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
862  ret <4 x half> %tex
863}
864
865define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
866  ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1100
867  ; UNPACKED: bb.1 (%ir-block.0):
868  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
869  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
870  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
871  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
872  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
873  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
874  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
875  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
876  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
877  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
878  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
879  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
880  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
881  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
882  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
883  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
884  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
885  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
886  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
887  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
888  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
889  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
890  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
891  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
892  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
893  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
894  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
895  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
896  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
897  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
898  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
899  ; PACKED-LABEL: name: image_load_v4f16_dmask_1100
900  ; PACKED: bb.1 (%ir-block.0):
901  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
902  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
903  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
904  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
905  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
906  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
907  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
908  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
909  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
910  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
911  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
912  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
913  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
914  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
915  ; PACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
916  ; PACKED:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
917  ; PACKED:   $vgpr1 = COPY [[DEF]](<2 x s16>)
918  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
919  %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
920  ret <4 x half> %tex
921}
922
923define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
924  ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1000
925  ; UNPACKED: bb.1 (%ir-block.0):
926  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
927  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
928  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
929  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
930  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
931  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
932  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
933  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
934  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
935  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
936  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
937  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
938  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
939  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
940  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
941  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
942  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
943  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
944  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
945  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
946  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
947  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
948  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
949  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
950  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
951  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
952  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
953  ; PACKED-LABEL: name: image_load_v4f16_dmask_1000
954  ; PACKED: bb.1 (%ir-block.0):
955  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
956  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
957  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
958  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
959  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
960  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
961  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
962  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
963  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
964  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
965  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
966  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
967  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
968  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
969  ; PACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
970  ; PACKED:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
971  ; PACKED:   $vgpr1 = COPY [[DEF]](<2 x s16>)
972  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
973  %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
974  ret <4 x half> %tex
975}
976
977define amdgpu_ps <4 x half> @image_load_v4f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
978  ; UNPACKED-LABEL: name: image_load_v4f16_dmask_0000
979  ; UNPACKED: bb.1 (%ir-block.0):
980  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
981  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
982  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
983  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
984  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
985  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
986  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
987  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
988  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
989  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
990  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
991  ; UNPACKED:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
992  ; UNPACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
993  ; UNPACKED:   $vgpr0 = COPY [[UV]](<2 x s16>)
994  ; UNPACKED:   $vgpr1 = COPY [[UV1]](<2 x s16>)
995  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
996  ; PACKED-LABEL: name: image_load_v4f16_dmask_0000
997  ; PACKED: bb.1 (%ir-block.0):
998  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
999  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1000  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1001  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1002  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1003  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1004  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1005  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1006  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1007  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1008  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1009  ; PACKED:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1010  ; PACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
1011  ; PACKED:   $vgpr0 = COPY [[UV]](<2 x s16>)
1012  ; PACKED:   $vgpr1 = COPY [[UV1]](<2 x s16>)
1013  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1014  %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
1015  ret <4 x half> %tex
1016}
1017
1018define amdgpu_ps half @image_load_tfe_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1019  ; UNPACKED-LABEL: name: image_load_tfe_f16_dmask_0000
1020  ; UNPACKED: bb.1 (%ir-block.0):
1021  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1022  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1023  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1024  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1025  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1026  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1027  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1028  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1029  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1030  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1031  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1032  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1033  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1034  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1035  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1036  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1037  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1038  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1039  ; UNPACKED:   $vgpr0 = COPY [[COPY10]](s32)
1040  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
1041  ; PACKED-LABEL: name: image_load_tfe_f16_dmask_0000
1042  ; PACKED: bb.1 (%ir-block.0):
1043  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1044  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1045  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1046  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1047  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1048  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1049  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1050  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1051  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1052  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1053  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1054  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1055  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1056  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1057  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1058  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1059  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1060  ; PACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1061  ; PACKED:   $vgpr0 = COPY [[COPY10]](s32)
1062  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
1063  %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1064  %tex = extractvalue { half, i32 } %res, 0
1065  %tfe = extractvalue { half, i32 } %res, 1
1066  store i32 %tfe, i32 addrspace(1)* undef
1067  ret half %tex
1068}
1069
1070define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1071  ; UNPACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000
1072  ; UNPACKED: bb.1 (%ir-block.0):
1073  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1074  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1075  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1076  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1077  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1078  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1079  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1080  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1081  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1082  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1083  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1084  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1085  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1086  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1087  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1088  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1089  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1090  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1091  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
1092  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1093  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1094  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1095  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1096  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1097  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1098  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1099  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
1100  ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000
1101  ; PACKED: bb.1 (%ir-block.0):
1102  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1103  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1104  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1105  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1106  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1107  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1108  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1109  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1110  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1111  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1112  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1113  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1114  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1115  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1116  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1117  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1118  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1119  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1120  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1121  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
1122  %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1123  %tex = extractvalue { <2 x half>, i32 } %res, 0
1124  %tfe = extractvalue { <2 x half>, i32 } %res, 1
1125  store i32 %tfe, i32 addrspace(1)* undef
1126  ret <2 x half> %tex
1127}
1128
1129define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1130  ; UNPACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000
1131  ; UNPACKED: bb.1 (%ir-block.0):
1132  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1133  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1134  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1135  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1136  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1137  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1138  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1139  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1140  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1141  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1142  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1143  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1144  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1145  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1146  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1147  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1148  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1149  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1150  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
1151  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1152  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1153  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1154  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1155  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1156  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1157  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1158  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
1159  ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000
1160  ; PACKED: bb.1 (%ir-block.0):
1161  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1162  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1163  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1164  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1165  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1166  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1167  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1168  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1169  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1170  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1171  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1172  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1173  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1174  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1175  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1176  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1177  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1178  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1179  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1180  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
1181  %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1182  %tex = extractvalue { <2 x half>, i32 } %res, 0
1183  %tfe = extractvalue { <2 x half>, i32 } %res, 1
1184  store i32 %tfe, i32 addrspace(1)* undef
1185  ret <2 x half> %tex
1186}
1187
1188define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1189  ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100
1190  ; UNPACKED: bb.1 (%ir-block.0):
1191  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1192  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1193  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1194  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1195  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1196  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1197  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1198  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1199  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1200  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1201  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1202  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1203  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1204  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1205  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
1206  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
1207  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1208  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1209  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
1210  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
1211  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
1212  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1213  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
1214  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1215  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1216  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1217  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
1218  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
1219  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1220  ; UNPACKED:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
1221  ; UNPACKED:   [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
1222  ; UNPACKED:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1223  ; UNPACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1224  ; UNPACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
1225  ; UNPACKED:   [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
1226  ; UNPACKED:   [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32
1227  ; UNPACKED:   $vgpr0 = COPY [[EXTRACT1]](<2 x s16>)
1228  ; UNPACKED:   $vgpr1 = COPY [[EXTRACT2]](<2 x s16>)
1229  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1230  ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100
1231  ; PACKED: bb.1 (%ir-block.0):
1232  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1233  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1234  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1235  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1236  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1237  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1238  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1239  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1240  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1241  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1242  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1243  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1244  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1245  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1246  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
1247  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1248  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1249  ; PACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1250  ; PACKED:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[DEF1]](<2 x s16>), [[DEF1]](<2 x s16>)
1251  ; PACKED:   [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
1252  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1253  ; PACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1254  ; PACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV2]](<3 x s16>), 0
1255  ; PACKED:   [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
1256  ; PACKED:   [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32
1257  ; PACKED:   $vgpr0 = COPY [[EXTRACT]](<2 x s16>)
1258  ; PACKED:   $vgpr1 = COPY [[EXTRACT1]](<2 x s16>)
1259  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1260  %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1261  %tex = extractvalue { <3 x half>, i32 } %res, 0
1262  %tfe = extractvalue { <3 x half>, i32 } %res, 1
1263  store i32 %tfe, i32 addrspace(1)* undef
1264  ret <3 x half> %tex
1265}
1266
1267define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1268  ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000
1269  ; UNPACKED: bb.1 (%ir-block.0):
1270  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1271  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1272  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1273  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1274  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1275  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1276  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1277  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1278  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1279  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1280  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1281  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1282  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1283  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1284  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1285  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1286  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1287  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1288  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
1289  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1290  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1291  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1292  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1293  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1294  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
1295  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1296  ; UNPACKED:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
1297  ; UNPACKED:   [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
1298  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1299  ; UNPACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1300  ; UNPACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
1301  ; UNPACKED:   [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
1302  ; UNPACKED:   [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32
1303  ; UNPACKED:   $vgpr0 = COPY [[EXTRACT1]](<2 x s16>)
1304  ; UNPACKED:   $vgpr1 = COPY [[EXTRACT2]](<2 x s16>)
1305  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1306  ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000
1307  ; PACKED: bb.1 (%ir-block.0):
1308  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1309  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1310  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1311  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1312  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1313  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1314  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1315  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1316  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1317  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1318  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1319  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1320  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1321  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1322  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1323  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1324  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1325  ; PACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1326  ; PACKED:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[DEF1]](<2 x s16>), [[DEF1]](<2 x s16>)
1327  ; PACKED:   [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
1328  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1329  ; PACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1330  ; PACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV2]](<3 x s16>), 0
1331  ; PACKED:   [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
1332  ; PACKED:   [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32
1333  ; PACKED:   $vgpr0 = COPY [[EXTRACT]](<2 x s16>)
1334  ; PACKED:   $vgpr1 = COPY [[EXTRACT1]](<2 x s16>)
1335  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1336  %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1337  %tex = extractvalue { <3 x half>, i32 } %res, 0
1338  %tfe = extractvalue { <3 x half>, i32 } %res, 1
1339  store i32 %tfe, i32 addrspace(1)* undef
1340  ret <3 x half> %tex
1341}
1342
1343define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1344  ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000
1345  ; UNPACKED: bb.1 (%ir-block.0):
1346  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1347  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1348  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1349  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1350  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1351  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1352  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1353  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1354  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1355  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1356  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1357  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1358  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1359  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1360  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1361  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1362  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1363  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1364  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
1365  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1366  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1367  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1368  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1369  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1370  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
1371  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1372  ; UNPACKED:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
1373  ; UNPACKED:   [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
1374  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1375  ; UNPACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1376  ; UNPACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
1377  ; UNPACKED:   [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
1378  ; UNPACKED:   [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32
1379  ; UNPACKED:   $vgpr0 = COPY [[EXTRACT1]](<2 x s16>)
1380  ; UNPACKED:   $vgpr1 = COPY [[EXTRACT2]](<2 x s16>)
1381  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1382  ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000
1383  ; PACKED: bb.1 (%ir-block.0):
1384  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1385  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1386  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1387  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1388  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1389  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1390  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1391  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1392  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1393  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1394  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1395  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1396  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1397  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1398  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1399  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1400  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1401  ; PACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1402  ; PACKED:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[DEF1]](<2 x s16>), [[DEF1]](<2 x s16>)
1403  ; PACKED:   [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
1404  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1405  ; PACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1406  ; PACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV2]](<3 x s16>), 0
1407  ; PACKED:   [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
1408  ; PACKED:   [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32
1409  ; PACKED:   $vgpr0 = COPY [[EXTRACT]](<2 x s16>)
1410  ; PACKED:   $vgpr1 = COPY [[EXTRACT1]](<2 x s16>)
1411  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1412  %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1413  %tex = extractvalue { <3 x half>, i32 } %res, 0
1414  %tfe = extractvalue { <3 x half>, i32 } %res, 1
1415  store i32 %tfe, i32 addrspace(1)* undef
1416  ret <3 x half> %tex
1417}
1418
1419define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1420  ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1110
1421  ; UNPACKED: bb.1 (%ir-block.0):
1422  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1423  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1424  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1425  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1426  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1427  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1428  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1429  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1430  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1431  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1432  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1433  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1434  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1435  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1436  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
1437  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
1438  ; UNPACKED:   G_STORE [[UV3]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1439  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1440  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1441  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
1442  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
1443  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
1444  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1445  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
1446  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1447  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1448  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
1449  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]]
1450  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1451  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
1452  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
1453  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1454  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1455  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1456  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1457  ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1110
1458  ; PACKED: bb.1 (%ir-block.0):
1459  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1460  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1461  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1462  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1463  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1464  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1465  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1466  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1467  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1468  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1469  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1470  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1471  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1472  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1473  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
1474  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
1475  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1476  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
1477  ; PACKED:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1478  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1479  ; PACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1480  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1481  %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1482  %tex = extractvalue { <4 x half>, i32 } %res, 0
1483  %tfe = extractvalue { <4 x half>, i32 } %res, 1
1484  store i32 %tfe, i32 addrspace(1)* undef
1485  ret <4 x half> %tex
1486}
1487
1488define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1489  ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1100
1490  ; UNPACKED: bb.1 (%ir-block.0):
1491  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1492  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1493  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1494  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1495  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1496  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1497  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1498  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1499  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1500  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1501  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1502  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1503  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1504  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1505  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
1506  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
1507  ; UNPACKED:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1508  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1509  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1510  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
1511  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
1512  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
1513  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1514  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
1515  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1516  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1517  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1518  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
1519  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
1520  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1521  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1522  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1523  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1524  ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1100
1525  ; PACKED: bb.1 (%ir-block.0):
1526  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1527  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1528  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1529  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1530  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1531  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1532  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1533  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1534  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1535  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1536  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1537  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1538  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1539  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1540  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
1541  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1542  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1543  ; PACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1544  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1545  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1546  ; PACKED:   $vgpr1 = COPY [[DEF1]](<2 x s16>)
1547  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1548  %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1549  %tex = extractvalue { <4 x half>, i32 } %res, 0
1550  %tfe = extractvalue { <4 x half>, i32 } %res, 1
1551  store i32 %tfe, i32 addrspace(1)* undef
1552  ret <4 x half> %tex
1553}
1554
1555define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1556  ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1000
1557  ; UNPACKED: bb.1 (%ir-block.0):
1558  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1559  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1560  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1561  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1562  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1563  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1564  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1565  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1566  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1567  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1568  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1569  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1570  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1571  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1572  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1573  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1574  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1575  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1576  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1577  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
1578  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1579  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1580  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1581  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1582  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1583  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
1584  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1585  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1586  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1587  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1588  ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1000
1589  ; PACKED: bb.1 (%ir-block.0):
1590  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1591  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1592  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1593  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1594  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1595  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1596  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1597  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1598  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1599  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1600  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1601  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1602  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1603  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1604  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1605  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1606  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1607  ; PACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1608  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1609  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1610  ; PACKED:   $vgpr1 = COPY [[DEF1]](<2 x s16>)
1611  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1612  %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1613  %tex = extractvalue { <4 x half>, i32 } %res, 0
1614  %tfe = extractvalue { <4 x half>, i32 } %res, 1
1615  store i32 %tfe, i32 addrspace(1)* undef
1616  ret <4 x half> %tex
1617}
1618
1619define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1620  ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_0000
1621  ; UNPACKED: bb.1 (%ir-block.0):
1622  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1623  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1624  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1625  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1626  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1627  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1628  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1629  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1630  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1631  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1632  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1633  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1634  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1635  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1636  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1637  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1638  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1639  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1640  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
1641  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
1642  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1643  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1644  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1645  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1646  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1647  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
1648  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1649  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1650  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1651  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1652  ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_0000
1653  ; PACKED: bb.1 (%ir-block.0):
1654  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1655  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1656  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1657  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1658  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1659  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1660  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1661  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1662  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1663  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1664  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1665  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1666  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1667  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1668  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
1669  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1670  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1671  ; PACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1672  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1673  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1674  ; PACKED:   $vgpr1 = COPY [[DEF1]](<2 x s16>)
1675  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1676  %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1677  %tex = extractvalue { <4 x half>, i32 } %res, 0
1678  %tfe = extractvalue { <4 x half>, i32 } %res, 1
1679  store i32 %tfe, i32 addrspace(1)* undef
1680  ret <4 x half> %tex
1681}
1682
1683declare half @llvm.amdgcn.image.load.2d.f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1684declare <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1685declare <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1686declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1687declare { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1688declare { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1689declare { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1690declare { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1691
1692attributes #0 = { nounwind readonly }
1693