1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=UNPACKED %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=PACKED %s
4
5define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
6  ; UNPACKED-LABEL: name: image_load_f16
7  ; UNPACKED: bb.1 (%ir-block.0):
8  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
9  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
10  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
11  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
12  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
13  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
14  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
15  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
16  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
17  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
18  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
19  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
20  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
21  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
22  ; UNPACKED:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16)
23  ; UNPACKED:   $vgpr0 = COPY [[ANYEXT]](s32)
24  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
25  ; PACKED-LABEL: name: image_load_f16
26  ; PACKED: bb.1 (%ir-block.0):
27  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
28  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
29  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
30  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
31  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
32  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
33  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
34  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
35  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
36  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
37  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
38  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
39  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
40  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
41  ; PACKED:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16)
42  ; PACKED:   $vgpr0 = COPY [[ANYEXT]](s32)
43  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
44  %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
45  ret half %tex
46}
47
48define amdgpu_ps <2 x half> @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
49  ; UNPACKED-LABEL: name: image_load_v2f16
50  ; UNPACKED: bb.1 (%ir-block.0):
51  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
52  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
53  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
54  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
55  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
56  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
57  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
58  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
59  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
60  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
61  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
62  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
63  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
64  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
65  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
66  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
67  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
68  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
69  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
70  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
71  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
72  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
73  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
74  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
75  ; PACKED-LABEL: name: image_load_v2f16
76  ; PACKED: bb.1 (%ir-block.0):
77  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
78  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
79  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
80  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
81  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
82  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
83  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
84  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
85  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
86  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
87  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
88  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
89  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
90  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
91  ; PACKED:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
92  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
93  %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
94  ret <2 x half> %tex
95}
96
97define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
98  ; UNPACKED-LABEL: name: image_load_v3f16
99  ; UNPACKED: bb.1 (%ir-block.0):
100  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
101  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
102  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
103  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
104  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
105  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
106  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
107  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
108  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
109  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
110  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
111  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
112  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
113  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8)
114  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
115  ; UNPACKED:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
116  ; UNPACKED:   [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
117  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
118  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
119  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
120  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]]
121  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]]
122  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
123  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
124  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
125  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C1]]
126  ; UNPACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
127  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
128  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
129  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
130  ; UNPACKED:   $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
131  ; UNPACKED:   $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
132  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
133  ; PACKED-LABEL: name: image_load_v3f16
134  ; PACKED: bb.1 (%ir-block.0):
135  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
136  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
137  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
138  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
139  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
140  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
141  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
142  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
143  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
144  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
145  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
146  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
147  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
148  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8)
149  ; PACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>)
150  ; PACKED:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
151  ; PACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
152  ; PACKED:   [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
153  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
154  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
155  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
156  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
157  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
158  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
159  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
160  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
161  ; PACKED:   $vgpr0 = COPY [[UV]](<2 x s16>)
162  ; PACKED:   $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
163  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
164  %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
165  ret <3 x half> %tex
166}
167
168define amdgpu_ps <4 x half> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
169  ; UNPACKED-LABEL: name: image_load_v4f16
170  ; UNPACKED: bb.1 (%ir-block.0):
171  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
172  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
173  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
174  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
175  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
176  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
177  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
178  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
179  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
180  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
181  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
182  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
183  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
184  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource")
185  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
186  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
187  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
188  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
189  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
190  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
191  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
192  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
193  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
194  ; UNPACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]]
195  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
196  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
197  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
198  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
199  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
200  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
201  ; PACKED-LABEL: name: image_load_v4f16
202  ; PACKED: bb.1 (%ir-block.0):
203  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
204  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
205  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
206  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
207  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
208  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
209  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
210  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
211  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
212  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
213  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
214  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
215  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
216  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource")
217  ; PACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>)
218  ; PACKED:   $vgpr0 = COPY [[UV]](<2 x s16>)
219  ; PACKED:   $vgpr1 = COPY [[UV1]](<2 x s16>)
220  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
221  %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
222  ret <4 x half> %tex
223}
224
225define amdgpu_ps half @image_load_tfe_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
226  ; UNPACKED-LABEL: name: image_load_tfe_f16
227  ; UNPACKED: bb.1 (%ir-block.0):
228  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
229  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
230  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
231  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
232  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
233  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
234  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
235  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
236  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
237  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
238  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
239  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
240  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
241  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
242  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
243  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
244  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
245  ; UNPACKED:   $vgpr0 = COPY [[UV]](s32)
246  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
247  ; PACKED-LABEL: name: image_load_tfe_f16
248  ; PACKED: bb.1 (%ir-block.0):
249  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
250  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
251  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
252  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
253  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
254  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
255  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
256  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
257  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
258  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
259  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
260  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
261  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
262  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
263  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
264  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
265  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
266  ; PACKED:   $vgpr0 = COPY [[UV]](s32)
267  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
268  %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
269  %tex = extractvalue { half, i32 } %res, 0
270  %tfe = extractvalue { half, i32 } %res, 1
271  store i32 %tfe, i32 addrspace(1)* undef
272  ret half %tex
273}
274
275define amdgpu_ps <2 x half> @image_load_tfe_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
276  ; UNPACKED-LABEL: name: image_load_tfe_v2f16
277  ; UNPACKED: bb.1 (%ir-block.0):
278  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
279  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
280  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
281  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
282  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
283  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
284  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
285  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
286  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
287  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
288  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
289  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
290  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
291  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
292  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
293  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
294  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
295  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
296  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
297  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
298  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
299  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
300  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
301  ; UNPACKED:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
302  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
303  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
304  ; PACKED-LABEL: name: image_load_tfe_v2f16
305  ; PACKED: bb.1 (%ir-block.0):
306  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
307  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
308  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
309  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
310  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
311  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
312  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
313  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
314  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
315  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
316  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
317  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
318  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
319  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
320  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
321  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
322  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
323  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
324  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
325  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
326  %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
327  %tex = extractvalue { <2 x half>, i32 } %res, 0
328  %tfe = extractvalue { <2 x half>, i32 } %res, 1
329  store i32 %tfe, i32 addrspace(1)* undef
330  ret <2 x half> %tex
331}
332
333define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
334  ; UNPACKED-LABEL: name: image_load_tfe_v3f16
335  ; UNPACKED: bb.1 (%ir-block.0):
336  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
337  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
338  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
339  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
340  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
341  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
342  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
343  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
344  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
345  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
346  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
347  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
348  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
349  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
350  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8)
351  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
352  ; UNPACKED:   G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
353  ; UNPACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
354  ; UNPACKED:   [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
355  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
356  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
357  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
358  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]]
359  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]]
360  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
361  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
362  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
363  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C1]]
364  ; UNPACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
365  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
366  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
367  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
368  ; UNPACKED:   $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
369  ; UNPACKED:   $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
370  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
371  ; PACKED-LABEL: name: image_load_tfe_v3f16
372  ; PACKED: bb.1 (%ir-block.0):
373  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
374  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
375  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
376  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
377  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
378  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
379  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
380  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
381  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
382  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
383  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
384  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
385  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
386  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
387  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8)
388  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
389  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
390  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
391  ; PACKED:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
392  ; PACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
393  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>)
394  ; PACKED:   [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
395  ; PACKED:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
396  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
397  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
398  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
399  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
400  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
401  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
402  ; PACKED:   [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
403  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
404  ; PACKED:   $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
405  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
406  %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
407  %tex = extractvalue { <3 x half>, i32 } %res, 0
408  %tfe = extractvalue { <3 x half>, i32 } %res, 1
409  store i32 %tfe, i32 addrspace(1)* undef
410  ret <3 x half> %tex
411}
412
413define amdgpu_ps <4 x half> @image_load_tfe_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
414  ; UNPACKED-LABEL: name: image_load_tfe_v4f16
415  ; UNPACKED: bb.1 (%ir-block.0):
416  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
417  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
418  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
419  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
420  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
421  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
422  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
423  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
424  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
425  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
426  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
427  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
428  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
429  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
430  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource")
431  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
432  ; UNPACKED:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
433  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
434  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
435  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
436  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
437  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
438  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
439  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
440  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
441  ; UNPACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]]
442  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
443  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
444  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
445  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
446  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
447  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
448  ; PACKED-LABEL: name: image_load_tfe_v4f16
449  ; PACKED: bb.1 (%ir-block.0):
450  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
451  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
452  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
453  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
454  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
455  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
456  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
457  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
458  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
459  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
460  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
461  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
462  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
463  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
464  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource")
465  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
466  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
467  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
468  ; PACKED:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
469  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
470  ; PACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
471  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
472  %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
473  %tex = extractvalue { <4 x half>, i32 } %res, 0
474  %tfe = extractvalue { <4 x half>, i32 } %res, 1
475  store i32 %tfe, i32 addrspace(1)* undef
476  ret <4 x half> %tex
477}
478
479define amdgpu_ps half @image_load_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
480  ; UNPACKED-LABEL: name: image_load_f16_dmask_0000
481  ; UNPACKED: bb.1 (%ir-block.0):
482  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
483  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
484  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
485  ; UNPACKED:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
486  ; UNPACKED:   $vgpr0 = COPY [[DEF]](s32)
487  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
488  ; PACKED-LABEL: name: image_load_f16_dmask_0000
489  ; PACKED: bb.1 (%ir-block.0):
490  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
491  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
492  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
493  ; PACKED:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
494  ; PACKED:   $vgpr0 = COPY [[DEF]](s32)
495  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
496  %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
497  ret half %tex
498}
499
500define amdgpu_ps <2 x half> @image_load_v2f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
501  ; UNPACKED-LABEL: name: image_load_v2f16_dmask_1000
502  ; UNPACKED: bb.1 (%ir-block.0):
503  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
504  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
505  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
506  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
507  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
508  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
509  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
510  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
511  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
512  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
513  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
514  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
515  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
516  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
517  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
518  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C]]
519  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
520  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
521  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
522  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
523  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
524  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
525  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
526  ; PACKED-LABEL: name: image_load_v2f16_dmask_1000
527  ; PACKED: bb.1 (%ir-block.0):
528  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
529  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
530  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
531  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
532  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
533  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
534  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
535  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
536  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
537  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
538  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
539  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
540  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
541  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
542  ; PACKED:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
543  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
544  %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
545  ret <2 x half> %tex
546}
547
548define amdgpu_ps <2 x half> @image_load_v2f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
549  ; UNPACKED-LABEL: name: image_load_v2f16_dmask_0000
550  ; UNPACKED: bb.1 (%ir-block.0):
551  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
552  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
553  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
554  ; UNPACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
555  ; UNPACKED:   $vgpr0 = COPY [[DEF]](<2 x s16>)
556  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
557  ; PACKED-LABEL: name: image_load_v2f16_dmask_0000
558  ; PACKED: bb.1 (%ir-block.0):
559  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
560  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
561  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
562  ; PACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
563  ; PACKED:   $vgpr0 = COPY [[DEF]](<2 x s16>)
564  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
565  %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
566  ret <2 x half> %tex
567}
568
569define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
570  ; UNPACKED-LABEL: name: image_load_v3f16_dmask_1100
571  ; UNPACKED: bb.1 (%ir-block.0):
572  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
573  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
574  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
575  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
576  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
577  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
578  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
579  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
580  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
581  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
582  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
583  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
584  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
585  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
586  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
587  ; UNPACKED:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
588  ; UNPACKED:   [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
589  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
590  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
591  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
592  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]]
593  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]]
594  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
595  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
596  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
597  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
598  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
599  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
600  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
601  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
602  ; UNPACKED:   $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
603  ; UNPACKED:   $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
604  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
605  ; PACKED-LABEL: name: image_load_v3f16_dmask_1100
606  ; PACKED: bb.1 (%ir-block.0):
607  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
608  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
609  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
610  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
611  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
612  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
613  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
614  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
615  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
616  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
617  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
618  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
619  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
620  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
621  ; PACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
622  ; PACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
623  ; PACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>)
624  ; PACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
625  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
626  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
627  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
628  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
629  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
630  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
631  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
632  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
633  ; PACKED:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
634  ; PACKED:   $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
635  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
636  %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
637  ret <3 x half> %tex
638}
639
640define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
641  ; UNPACKED-LABEL: name: image_load_v3f16_dmask_1000
642  ; UNPACKED: bb.1 (%ir-block.0):
643  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
644  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
645  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
646  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
647  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
648  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
649  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
650  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
651  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
652  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
653  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
654  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
655  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
656  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
657  ; UNPACKED:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
658  ; UNPACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
659  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
660  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
661  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
662  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C1]]
663  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
664  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
665  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
666  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
667  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
668  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
669  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
670  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
671  ; UNPACKED:   $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
672  ; UNPACKED:   $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
673  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
674  ; PACKED-LABEL: name: image_load_v3f16_dmask_1000
675  ; PACKED: bb.1 (%ir-block.0):
676  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
677  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
678  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
679  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
680  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
681  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
682  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
683  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
684  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
685  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
686  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
687  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
688  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
689  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
690  ; PACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
691  ; PACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
692  ; PACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>)
693  ; PACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
694  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
695  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
696  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
697  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
698  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
699  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
700  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
701  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
702  ; PACKED:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
703  ; PACKED:   $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
704  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
705  %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
706  ret <3 x half> %tex
707}
708
709define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
710  ; UNPACKED-LABEL: name: image_load_v3f16_dmask_0000
711  ; UNPACKED: bb.1 (%ir-block.0):
712  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
713  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
714  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
715  ; UNPACKED:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
716  ; UNPACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
717  ; UNPACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
718  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
719  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
720  ; UNPACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
721  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
722  ; UNPACKED:   [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
723  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
724  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
725  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
726  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
727  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
728  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
729  ; UNPACKED:   [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
730  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
731  ; UNPACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
732  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
733  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
734  ; UNPACKED:   [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
735  ; UNPACKED:   $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
736  ; UNPACKED:   $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
737  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
738  ; PACKED-LABEL: name: image_load_v3f16_dmask_0000
739  ; PACKED: bb.1 (%ir-block.0):
740  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
741  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
742  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
743  ; PACKED:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
744  ; PACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
745  ; PACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
746  ; PACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
747  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
748  ; PACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
749  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
750  ; PACKED:   [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
751  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
752  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
753  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
754  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
755  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
756  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
757  ; PACKED:   [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
758  ; PACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
759  ; PACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
760  ; PACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
761  ; PACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
762  ; PACKED:   [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
763  ; PACKED:   $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
764  ; PACKED:   $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
765  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
766  %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
767  ret <3 x half> %tex
768}
769
770define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
771  ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1110
772  ; UNPACKED: bb.1 (%ir-block.0):
773  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
774  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
775  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
776  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
777  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
778  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
779  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
780  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
781  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
782  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
783  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
784  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
785  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
786  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8)
787  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
788  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
789  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
790  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
791  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
792  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
793  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
794  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
795  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
796  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
797  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
798  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
799  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
800  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
801  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
802  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
803  ; PACKED-LABEL: name: image_load_v4f16_dmask_1110
804  ; PACKED: bb.1 (%ir-block.0):
805  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
806  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
807  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
808  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
809  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
810  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
811  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
812  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
813  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
814  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
815  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
816  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
817  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
818  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8)
819  ; PACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>)
820  ; PACKED:   $vgpr0 = COPY [[UV]](<2 x s16>)
821  ; PACKED:   $vgpr1 = COPY [[UV1]](<2 x s16>)
822  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
823  %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
824  ret <4 x half> %tex
825}
826
827define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
828  ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1100
829  ; UNPACKED: bb.1 (%ir-block.0):
830  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
831  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
832  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
833  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
834  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
835  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
836  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
837  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
838  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
839  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
840  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
841  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
842  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
843  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
844  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
845  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
846  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
847  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
848  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
849  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
850  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
851  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
852  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
853  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
854  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
855  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
856  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
857  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
858  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
859  ; PACKED-LABEL: name: image_load_v4f16_dmask_1100
860  ; PACKED: bb.1 (%ir-block.0):
861  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
862  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
863  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
864  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
865  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
866  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
867  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
868  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
869  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
870  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
871  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
872  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
873  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
874  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
875  ; PACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
876  ; PACKED:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
877  ; PACKED:   $vgpr1 = COPY [[DEF]](<2 x s16>)
878  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
879  %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
880  ret <4 x half> %tex
881}
882
883define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
884  ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1000
885  ; UNPACKED: bb.1 (%ir-block.0):
886  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
887  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
888  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
889  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
890  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
891  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
892  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
893  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
894  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
895  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
896  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
897  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
898  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
899  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
900  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
901  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C]]
902  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
903  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
904  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
905  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
906  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
907  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
908  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
909  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
910  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
911  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
912  ; PACKED-LABEL: name: image_load_v4f16_dmask_1000
913  ; PACKED: bb.1 (%ir-block.0):
914  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
915  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
916  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
917  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
918  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
919  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
920  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
921  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
922  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
923  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
924  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
925  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
926  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
927  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
928  ; PACKED:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
929  ; PACKED:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
930  ; PACKED:   $vgpr1 = COPY [[DEF]](<2 x s16>)
931  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
932  %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
933  ret <4 x half> %tex
934}
935
936define amdgpu_ps <4 x half> @image_load_v4f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
937  ; UNPACKED-LABEL: name: image_load_v4f16_dmask_0000
938  ; UNPACKED: bb.1 (%ir-block.0):
939  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
940  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
941  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
942  ; UNPACKED:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
943  ; UNPACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
944  ; UNPACKED:   $vgpr0 = COPY [[UV]](<2 x s16>)
945  ; UNPACKED:   $vgpr1 = COPY [[UV1]](<2 x s16>)
946  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
947  ; PACKED-LABEL: name: image_load_v4f16_dmask_0000
948  ; PACKED: bb.1 (%ir-block.0):
949  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
950  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
951  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
952  ; PACKED:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
953  ; PACKED:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
954  ; PACKED:   $vgpr0 = COPY [[UV]](<2 x s16>)
955  ; PACKED:   $vgpr1 = COPY [[UV1]](<2 x s16>)
956  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
957  %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
958  ret <4 x half> %tex
959}
960
961define amdgpu_ps half @image_load_tfe_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
962  ; UNPACKED-LABEL: name: image_load_tfe_f16_dmask_0000
963  ; UNPACKED: bb.1 (%ir-block.0):
964  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
965  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
966  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
967  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
968  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
969  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
970  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
971  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
972  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
973  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
974  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
975  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
976  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
977  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
978  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
979  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
980  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
981  ; UNPACKED:   $vgpr0 = COPY [[UV]](s32)
982  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
983  ; PACKED-LABEL: name: image_load_tfe_f16_dmask_0000
984  ; PACKED: bb.1 (%ir-block.0):
985  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
986  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
987  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
988  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
989  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
990  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
991  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
992  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
993  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
994  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
995  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
996  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
997  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
998  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
999  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
1000  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1001  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1002  ; PACKED:   $vgpr0 = COPY [[UV]](s32)
1003  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
1004  %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1005  %tex = extractvalue { half, i32 } %res, 0
1006  %tfe = extractvalue { half, i32 } %res, 1
1007  store i32 %tfe, i32 addrspace(1)* undef
1008  ret half %tex
1009}
1010
1011define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1012  ; UNPACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000
1013  ; UNPACKED: bb.1 (%ir-block.0):
1014  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1015  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1016  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1017  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1018  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1019  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1020  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1021  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1022  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1023  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1024  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1025  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1026  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1027  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1028  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
1029  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1030  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1031  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1032  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1033  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1034  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1035  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1036  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1037  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1038  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1039  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
1040  ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000
1041  ; PACKED: bb.1 (%ir-block.0):
1042  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1043  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1044  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1045  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1046  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1047  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1048  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1049  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1050  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1051  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1052  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1053  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1054  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1055  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1056  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
1057  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1058  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1059  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1060  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1061  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
1062  %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1063  %tex = extractvalue { <2 x half>, i32 } %res, 0
1064  %tfe = extractvalue { <2 x half>, i32 } %res, 1
1065  store i32 %tfe, i32 addrspace(1)* undef
1066  ret <2 x half> %tex
1067}
1068
1069define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1070  ; UNPACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000
1071  ; UNPACKED: bb.1 (%ir-block.0):
1072  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1073  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1074  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1075  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1076  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1077  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1078  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1079  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1080  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1081  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1082  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1083  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1084  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1085  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1086  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
1087  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1088  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1089  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1090  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1091  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1092  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1093  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1094  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1095  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1096  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1097  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
1098  ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000
1099  ; PACKED: bb.1 (%ir-block.0):
1100  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1101  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1102  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1103  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1104  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1105  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1106  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1107  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1108  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1109  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1110  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1111  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1112  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1113  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1114  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
1115  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1116  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1117  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1118  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1119  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
1120  %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1121  %tex = extractvalue { <2 x half>, i32 } %res, 0
1122  %tfe = extractvalue { <2 x half>, i32 } %res, 1
1123  store i32 %tfe, i32 addrspace(1)* undef
1124  ret <2 x half> %tex
1125}
1126
1127define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1128  ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100
1129  ; UNPACKED: bb.1 (%ir-block.0):
1130  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1131  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1132  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1133  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1134  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1135  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1136  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1137  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1138  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1139  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1140  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1141  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1142  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1143  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1144  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
1145  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
1146  ; UNPACKED:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1147  ; UNPACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1148  ; UNPACKED:   [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
1149  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
1150  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1151  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1152  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]]
1153  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]]
1154  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
1155  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1156  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1157  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1158  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
1159  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
1160  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
1161  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1162  ; UNPACKED:   $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
1163  ; UNPACKED:   $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
1164  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1165  ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100
1166  ; PACKED: bb.1 (%ir-block.0):
1167  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1168  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1169  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1170  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1171  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1172  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1173  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1174  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1175  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1176  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1177  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1178  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1179  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1180  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1181  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
1182  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1183  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1184  ; PACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1185  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1186  ; PACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1187  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>)
1188  ; PACKED:   [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
1189  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
1190  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1191  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1192  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
1193  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
1194  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
1195  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1196  ; PACKED:   [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1197  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1198  ; PACKED:   $vgpr1 = COPY [[BITCAST3]](<2 x s16>)
1199  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1200  %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1201  %tex = extractvalue { <3 x half>, i32 } %res, 0
1202  %tfe = extractvalue { <3 x half>, i32 } %res, 1
1203  store i32 %tfe, i32 addrspace(1)* undef
1204  ret <3 x half> %tex
1205}
1206
1207define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1208  ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000
1209  ; UNPACKED: bb.1 (%ir-block.0):
1210  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1211  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1212  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1213  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1214  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1215  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1216  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1217  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1218  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1219  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1220  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1221  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1222  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1223  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1224  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
1225  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1226  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1227  ; UNPACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1228  ; UNPACKED:   [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
1229  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
1230  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1231  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1232  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]]
1233  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1234  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
1235  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1236  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1237  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
1238  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
1239  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
1240  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1241  ; UNPACKED:   $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
1242  ; UNPACKED:   $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
1243  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1244  ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000
1245  ; PACKED: bb.1 (%ir-block.0):
1246  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1247  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1248  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1249  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1250  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1251  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1252  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1253  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1254  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1255  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1256  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1257  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1258  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1259  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1260  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
1261  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1262  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1263  ; PACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1264  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1265  ; PACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1266  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>)
1267  ; PACKED:   [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
1268  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
1269  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1270  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1271  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
1272  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
1273  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
1274  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1275  ; PACKED:   [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1276  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1277  ; PACKED:   $vgpr1 = COPY [[BITCAST3]](<2 x s16>)
1278  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1279  %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1280  %tex = extractvalue { <3 x half>, i32 } %res, 0
1281  %tfe = extractvalue { <3 x half>, i32 } %res, 1
1282  store i32 %tfe, i32 addrspace(1)* undef
1283  ret <3 x half> %tex
1284}
1285
1286define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1287  ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000
1288  ; UNPACKED: bb.1 (%ir-block.0):
1289  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1290  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1291  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1292  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1293  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1294  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1295  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1296  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1297  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1298  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1299  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1300  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1301  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1302  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1303  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
1304  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1305  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1306  ; UNPACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1307  ; UNPACKED:   [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
1308  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
1309  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1310  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1311  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]]
1312  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1313  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
1314  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1315  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1316  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
1317  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
1318  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
1319  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1320  ; UNPACKED:   $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
1321  ; UNPACKED:   $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
1322  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1323  ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000
1324  ; PACKED: bb.1 (%ir-block.0):
1325  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1326  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1327  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1328  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1329  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1330  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1331  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1332  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1333  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1334  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1335  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1336  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1337  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1338  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1339  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
1340  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1341  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1342  ; PACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1343  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1344  ; PACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
1345  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>)
1346  ; PACKED:   [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
1347  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
1348  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1349  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1350  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
1351  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
1352  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
1353  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1354  ; PACKED:   [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1355  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1356  ; PACKED:   $vgpr1 = COPY [[BITCAST3]](<2 x s16>)
1357  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1358  %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1359  %tex = extractvalue { <3 x half>, i32 } %res, 0
1360  %tfe = extractvalue { <3 x half>, i32 } %res, 1
1361  store i32 %tfe, i32 addrspace(1)* undef
1362  ret <3 x half> %tex
1363}
1364
1365define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1366  ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1110
1367  ; UNPACKED: bb.1 (%ir-block.0):
1368  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1369  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1370  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1371  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1372  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1373  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1374  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1375  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1376  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1377  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1378  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1379  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1380  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1381  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1382  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8)
1383  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
1384  ; UNPACKED:   G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1385  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1386  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1387  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
1388  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1389  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
1390  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1391  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1392  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
1393  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1394  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
1395  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
1396  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1397  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1398  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1399  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1400  ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1110
1401  ; PACKED: bb.1 (%ir-block.0):
1402  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1403  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1404  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1405  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1406  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1407  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1408  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1409  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1410  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1411  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1412  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1413  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1414  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1415  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1416  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8)
1417  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
1418  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1419  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
1420  ; PACKED:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1421  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1422  ; PACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1423  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1424  %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1425  %tex = extractvalue { <4 x half>, i32 } %res, 0
1426  %tfe = extractvalue { <4 x half>, i32 } %res, 1
1427  store i32 %tfe, i32 addrspace(1)* undef
1428  ret <4 x half> %tex
1429}
1430
1431define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1432  ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1100
1433  ; UNPACKED: bb.1 (%ir-block.0):
1434  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1435  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1436  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1437  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1438  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1439  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1440  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1441  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1442  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1443  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1444  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1445  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1446  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1447  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1448  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
1449  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
1450  ; UNPACKED:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1451  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1452  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1453  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
1454  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1455  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
1456  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1457  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1458  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1459  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
1460  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
1461  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1462  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1463  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1464  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1465  ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1100
1466  ; PACKED: bb.1 (%ir-block.0):
1467  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1468  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1469  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1470  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1471  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1472  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1473  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1474  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1475  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1476  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1477  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1478  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1479  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1480  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1481  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
1482  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1483  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1484  ; PACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1485  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1486  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1487  ; PACKED:   $vgpr1 = COPY [[DEF1]](<2 x s16>)
1488  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1489  %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1490  %tex = extractvalue { <4 x half>, i32 } %res, 0
1491  %tfe = extractvalue { <4 x half>, i32 } %res, 1
1492  store i32 %tfe, i32 addrspace(1)* undef
1493  ret <4 x half> %tex
1494}
1495
1496define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1497  ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1000
1498  ; UNPACKED: bb.1 (%ir-block.0):
1499  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1500  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1501  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1502  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1503  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1504  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1505  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1506  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1507  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1508  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1509  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1510  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1511  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1512  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1513  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
1514  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1515  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1516  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1517  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1518  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1519  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1520  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1521  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1522  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1523  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
1524  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1525  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1526  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1527  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1528  ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1000
1529  ; PACKED: bb.1 (%ir-block.0):
1530  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1531  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1532  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1533  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1534  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1535  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1536  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1537  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1538  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1539  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1540  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1541  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1542  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1543  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1544  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
1545  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1546  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1547  ; PACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1548  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1549  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1550  ; PACKED:   $vgpr1 = COPY [[DEF1]](<2 x s16>)
1551  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1552  %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1553  %tex = extractvalue { <4 x half>, i32 } %res, 0
1554  %tfe = extractvalue { <4 x half>, i32 } %res, 1
1555  store i32 %tfe, i32 addrspace(1)* undef
1556  ret <4 x half> %tex
1557}
1558
1559define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1560  ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_0000
1561  ; UNPACKED: bb.1 (%ir-block.0):
1562  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1563  ; UNPACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1564  ; UNPACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1565  ; UNPACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1566  ; UNPACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1567  ; UNPACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1568  ; UNPACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1569  ; UNPACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1570  ; UNPACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1571  ; UNPACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1572  ; UNPACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1573  ; UNPACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1574  ; UNPACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1575  ; UNPACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1576  ; UNPACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
1577  ; UNPACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1578  ; UNPACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1579  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1580  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1581  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1582  ; UNPACKED:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1583  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1584  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1585  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1586  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
1587  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1588  ; UNPACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1589  ; UNPACKED:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1590  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1591  ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_0000
1592  ; PACKED: bb.1 (%ir-block.0):
1593  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1594  ; PACKED:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1595  ; PACKED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1596  ; PACKED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1597  ; PACKED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1598  ; PACKED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1599  ; PACKED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1600  ; PACKED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1601  ; PACKED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1602  ; PACKED:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1603  ; PACKED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1604  ; PACKED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1605  ; PACKED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1606  ; PACKED:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1607  ; PACKED:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
1608  ; PACKED:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
1609  ; PACKED:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1610  ; PACKED:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1611  ; PACKED:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1612  ; PACKED:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1613  ; PACKED:   $vgpr1 = COPY [[DEF1]](<2 x s16>)
1614  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1615  %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1616  %tex = extractvalue { <4 x half>, i32 } %res, 0
1617  %tfe = extractvalue { <4 x half>, i32 } %res, 1
1618  store i32 %tfe, i32 addrspace(1)* undef
1619  ret <4 x half> %tex
1620}
1621
1622declare half @llvm.amdgcn.image.load.2d.f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1623declare <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1624declare <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1625declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1626declare { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1627declare { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1628declare { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1629declare { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1630
1631attributes #0 = { nounwind readonly }
1632