1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s
4
5define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
6  ; GFX9-LABEL: name: load_1d
7  ; GFX9: bb.1.main_body:
8  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
9  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
10  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
11  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
12  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
13  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
14  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
15  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
16  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
17  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
18  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
19  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
20  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
21  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
22  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
23  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
24  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
25  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
26  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
27  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
28  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
29  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
30  ; GFX10NSA-LABEL: name: load_1d
31  ; GFX10NSA: bb.1.main_body:
32  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
33  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
34  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
35  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
36  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
37  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
38  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
39  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
40  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
41  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
42  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
43  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
44  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
45  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
46  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
47  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
48  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
49  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
50  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
51  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
52  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
53  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
54main_body:
55  %s = extractelement <2 x i16> %coords, i32 0
56  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
57  ret <4 x float> %v
58}
59
60define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
61  ; GFX9-LABEL: name: load_2d
62  ; GFX9: bb.1.main_body:
63  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
64  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
65  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
66  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
67  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
68  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
69  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
70  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
71  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
72  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
73  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
74  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
75  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
76  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
77  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
78  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
79  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
80  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
81  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
82  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
83  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
84  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
85  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
86  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
87  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
88  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
89  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
90  ; GFX10NSA-LABEL: name: load_2d
91  ; GFX10NSA: bb.1.main_body:
92  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
93  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
94  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
95  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
96  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
97  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
98  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
99  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
100  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
101  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
102  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
103  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
104  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
105  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
106  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
107  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
108  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
109  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
110  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
111  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
112  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
113  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
114  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
115  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
116  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
117  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
118  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
119main_body:
120  %s = extractelement <2 x i16> %coords, i32 0
121  %t = extractelement <2 x i16> %coords, i32 1
122  %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
123  ret <4 x float> %v
124}
125
126define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
127  ; GFX9-LABEL: name: load_3d
128  ; GFX9: bb.1.main_body:
129  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
130  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
131  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
132  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
133  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
134  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
135  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
136  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
137  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
138  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
139  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
140  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
141  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
142  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
143  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
144  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
145  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
146  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
147  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
148  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
149  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
150  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
151  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
152  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
153  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
154  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
155  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
156  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
157  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
158  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
159  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
160  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
161  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
162  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
163  ; GFX10NSA-LABEL: name: load_3d
164  ; GFX10NSA: bb.1.main_body:
165  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
166  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
167  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
168  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
169  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
170  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
171  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
172  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
173  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
174  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
175  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
176  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
177  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
178  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
179  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
180  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
181  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
182  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
183  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
184  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
185  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
186  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
187  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
188  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
189  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
190  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
191  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
192  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
193  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
194  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
195  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
196  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
197  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
198  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
199main_body:
200  %s = extractelement <2 x i16> %coords_lo, i32 0
201  %t = extractelement <2 x i16> %coords_lo, i32 1
202  %r = extractelement <2 x i16> %coords_hi, i32 0
203  %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
204  ret <4 x float> %v
205}
206
207define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
208  ; GFX9-LABEL: name: load_cube
209  ; GFX9: bb.1.main_body:
210  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
211  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
212  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
213  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
214  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
215  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
216  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
217  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
218  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
219  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
220  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
221  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
222  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
223  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
224  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
225  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
226  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
227  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
228  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
229  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
230  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
231  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
232  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
233  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
234  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
235  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
236  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
237  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
238  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
239  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
240  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
241  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
242  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
243  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
244  ; GFX10NSA-LABEL: name: load_cube
245  ; GFX10NSA: bb.1.main_body:
246  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
247  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
248  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
249  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
250  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
251  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
252  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
253  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
254  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
255  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
256  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
257  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
258  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
259  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
260  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
261  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
262  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
263  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
264  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
265  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
266  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
267  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
268  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
269  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
270  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
271  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
272  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
273  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
274  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
275  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
276  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
277  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
278  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
279  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
280main_body:
281  %s = extractelement <2 x i16> %coords_lo, i32 0
282  %t = extractelement <2 x i16> %coords_lo, i32 1
283  %slice = extractelement <2 x i16> %coords_hi, i32 0
284  %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
285  ret <4 x float> %v
286}
287
288define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
289  ; GFX9-LABEL: name: load_1darray
290  ; GFX9: bb.1.main_body:
291  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
292  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
293  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
294  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
295  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
296  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
297  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
298  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
299  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
300  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
301  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
302  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
303  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
304  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
305  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
306  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
307  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
308  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
309  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
310  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
311  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
312  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
313  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
314  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
315  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
316  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
317  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
318  ; GFX10NSA-LABEL: name: load_1darray
319  ; GFX10NSA: bb.1.main_body:
320  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
321  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
322  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
323  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
324  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
325  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
326  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
327  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
328  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
329  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
330  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
331  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
332  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
333  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
334  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
335  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
336  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
337  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
338  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
339  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
340  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
341  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
342  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
343  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
344  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
345  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
346  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
347main_body:
348  %s = extractelement <2 x i16> %coords, i32 0
349  %slice = extractelement <2 x i16> %coords, i32 1
350  %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32 15, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
351  ret <4 x float> %v
352}
353
354define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
355  ; GFX9-LABEL: name: load_2darray
356  ; GFX9: bb.1.main_body:
357  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
358  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
359  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
360  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
361  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
362  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
363  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
364  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
365  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
366  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
367  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
368  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
369  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
370  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
371  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
372  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
373  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
374  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
375  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
376  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
377  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
378  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
379  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
380  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
381  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
382  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
383  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
384  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
385  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
386  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
387  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
388  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
389  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
390  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
391  ; GFX10NSA-LABEL: name: load_2darray
392  ; GFX10NSA: bb.1.main_body:
393  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
394  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
395  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
396  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
397  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
398  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
399  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
400  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
401  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
402  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
403  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
404  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
405  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
406  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
407  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
408  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
409  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
410  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
411  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
412  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
413  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
414  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
415  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
416  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
417  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
418  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
419  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
420  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
421  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
422  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
423  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
424  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
425  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
426  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
427main_body:
428  %s = extractelement <2 x i16> %coords_lo, i32 0
429  %t = extractelement <2 x i16> %coords_lo, i32 1
430  %slice = extractelement <2 x i16> %coords_hi, i32 0
431  %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
432  ret <4 x float> %v
433}
434
435define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
436  ; GFX9-LABEL: name: load_2dmsaa
437  ; GFX9: bb.1.main_body:
438  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
439  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
440  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
441  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
442  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
443  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
444  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
445  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
446  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
447  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
448  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
449  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
450  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
451  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
452  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
453  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
454  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
455  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
456  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
457  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
458  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
459  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
460  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
461  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
462  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
463  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
464  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
465  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
466  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
467  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
468  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
469  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
470  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
471  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
472  ; GFX10NSA-LABEL: name: load_2dmsaa
473  ; GFX10NSA: bb.1.main_body:
474  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
475  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
476  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
477  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
478  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
479  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
480  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
481  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
482  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
483  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
484  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
485  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
486  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
487  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
488  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
489  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
490  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
491  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
492  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
493  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
494  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
495  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
496  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
497  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
498  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
499  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
500  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
501  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
502  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
503  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
504  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
505  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
506  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
507  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
508main_body:
509  %s = extractelement <2 x i16> %coords_lo, i32 0
510  %t = extractelement <2 x i16> %coords_lo, i32 1
511  %fragid = extractelement <2 x i16> %coords_hi, i32 0
512  %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
513  ret <4 x float> %v
514}
515
516define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
517  ; GFX9-LABEL: name: load_2darraymsaa
518  ; GFX9: bb.1.main_body:
519  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
520  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
521  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
522  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
523  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
524  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
525  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
526  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
527  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
528  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
529  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
530  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
531  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
532  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
533  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
534  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
535  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
536  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
537  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
538  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
539  ; GFX9:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
540  ; GFX9:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
541  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
542  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
543  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
544  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
545  ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
546  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
547  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
548  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
549  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
550  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
551  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
552  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
553  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
554  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
555  ; GFX10NSA-LABEL: name: load_2darraymsaa
556  ; GFX10NSA: bb.1.main_body:
557  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
558  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
559  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
560  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
561  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
562  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
563  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
564  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
565  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
566  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
567  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
568  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
569  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
570  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
571  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
572  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
573  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
574  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
575  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
576  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
577  ; GFX10NSA:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
578  ; GFX10NSA:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
579  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
580  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
581  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
582  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
583  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
584  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
585  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
586  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
587  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
588  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
589  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
590  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
591  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
592  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
593main_body:
594  %s = extractelement <2 x i16> %coords_lo, i32 0
595  %t = extractelement <2 x i16> %coords_lo, i32 1
596  %slice = extractelement <2 x i16> %coords_hi, i32 0
597  %fragid = extractelement <2 x i16> %coords_hi, i32 1
598  %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
599  ret <4 x float> %v
600}
601
602define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
603  ; GFX9-LABEL: name: load_mip_1d
604  ; GFX9: bb.1.main_body:
605  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
606  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
607  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
608  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
609  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
610  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
611  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
612  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
613  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
614  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
615  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
616  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
617  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
618  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
619  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
620  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
621  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
622  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
623  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
624  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
625  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
626  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
627  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
628  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
629  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
630  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
631  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
632  ; GFX10NSA-LABEL: name: load_mip_1d
633  ; GFX10NSA: bb.1.main_body:
634  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
635  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
636  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
637  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
638  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
639  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
640  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
641  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
642  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
643  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
644  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
645  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
646  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
647  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
648  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
649  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
650  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
651  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
652  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
653  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
654  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
655  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
656  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
657  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
658  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
659  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
660  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
661main_body:
662  %s = extractelement <2 x i16> %coords, i32 0
663  %mip = extractelement <2 x i16> %coords, i32 1
664  %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 15, i16 %s, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
665  ret <4 x float> %v
666}
667
668define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
669  ; GFX9-LABEL: name: load_mip_2d
670  ; GFX9: bb.1.main_body:
671  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
672  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
673  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
674  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
675  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
676  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
677  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
678  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
679  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
680  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
681  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
682  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
683  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
684  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
685  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
686  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
687  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
688  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
689  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
690  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
691  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
692  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
693  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
694  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
695  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
696  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
697  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
698  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
699  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
700  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
701  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
702  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
703  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
704  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
705  ; GFX10NSA-LABEL: name: load_mip_2d
706  ; GFX10NSA: bb.1.main_body:
707  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
708  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
709  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
710  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
711  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
712  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
713  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
714  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
715  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
716  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
717  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
718  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
719  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
720  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
721  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
722  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
723  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
724  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
725  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
726  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
727  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
728  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
729  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
730  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
731  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
732  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
733  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
734  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
735  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
736  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
737  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
738  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
739  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
740  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
741main_body:
742  %s = extractelement <2 x i16> %coords_lo, i32 0
743  %t = extractelement <2 x i16> %coords_lo, i32 1
744  %mip = extractelement <2 x i16> %coords_hi, i32 0
745  %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
746  ret <4 x float> %v
747}
748
749define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
750  ; GFX9-LABEL: name: load_mip_3d
751  ; GFX9: bb.1.main_body:
752  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
753  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
754  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
755  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
756  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
757  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
758  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
759  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
760  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
761  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
762  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
763  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
764  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
765  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
766  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
767  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
768  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
769  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
770  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
771  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
772  ; GFX9:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
773  ; GFX9:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
774  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
775  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
776  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
777  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
778  ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
779  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
780  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
781  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
782  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
783  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
784  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
785  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
786  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
787  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
788  ; GFX10NSA-LABEL: name: load_mip_3d
789  ; GFX10NSA: bb.1.main_body:
790  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
791  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
792  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
793  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
794  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
795  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
796  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
797  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
798  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
799  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
800  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
801  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
802  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
803  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
804  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
805  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
806  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
807  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
808  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
809  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
810  ; GFX10NSA:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
811  ; GFX10NSA:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
812  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
813  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
814  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
815  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
816  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
817  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
818  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
819  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
820  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
821  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
822  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
823  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
824  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
825  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
826main_body:
827  %s = extractelement <2 x i16> %coords_lo, i32 0
828  %t = extractelement <2 x i16> %coords_lo, i32 1
829  %r = extractelement <2 x i16> %coords_hi, i32 0
830  %mip = extractelement <2 x i16> %coords_hi, i32 1
831  %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %r, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
832  ret <4 x float> %v
833}
834
835define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
836  ; GFX9-LABEL: name: load_mip_cube
837  ; GFX9: bb.1.main_body:
838  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
839  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
840  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
841  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
842  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
843  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
844  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
845  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
846  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
847  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
848  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
849  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
850  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
851  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
852  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
853  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
854  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
855  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
856  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
857  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
858  ; GFX9:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
859  ; GFX9:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
860  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
861  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
862  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
863  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
864  ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
865  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
866  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
867  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
868  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
869  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
870  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
871  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
872  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
873  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
874  ; GFX10NSA-LABEL: name: load_mip_cube
875  ; GFX10NSA: bb.1.main_body:
876  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
877  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
878  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
879  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
880  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
881  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
882  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
883  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
884  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
885  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
886  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
887  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
888  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
889  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
890  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
891  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
892  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
893  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
894  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
895  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
896  ; GFX10NSA:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
897  ; GFX10NSA:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
898  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
899  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
900  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
901  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
902  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
903  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
904  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
905  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
906  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
907  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
908  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
909  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
910  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
911  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
912main_body:
913  %s = extractelement <2 x i16> %coords_lo, i32 0
914  %t = extractelement <2 x i16> %coords_lo, i32 1
915  %slice = extractelement <2 x i16> %coords_hi, i32 0
916  %mip = extractelement <2 x i16> %coords_hi, i32 1
917  %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
918  ret <4 x float> %v
919}
920
921define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
922  ; GFX9-LABEL: name: load_mip_1darray
923  ; GFX9: bb.1.main_body:
924  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
925  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
926  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
927  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
928  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
929  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
930  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
931  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
932  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
933  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
934  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
935  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
936  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
937  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
938  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
939  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
940  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
941  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
942  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
943  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
944  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
945  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
946  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
947  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
948  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
949  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
950  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
951  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
952  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
953  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
954  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
955  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
956  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
957  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
958  ; GFX10NSA-LABEL: name: load_mip_1darray
959  ; GFX10NSA: bb.1.main_body:
960  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
961  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
962  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
963  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
964  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
965  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
966  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
967  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
968  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
969  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
970  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
971  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
972  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
973  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
974  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
975  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
976  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
977  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
978  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
979  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
980  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
981  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
982  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
983  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
984  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
985  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
986  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
987  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
988  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
989  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
990  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
991  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
992  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
993  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
994main_body:
995  %s = extractelement <2 x i16> %coords_lo, i32 0
996  %slice = extractelement <2 x i16> %coords_lo, i32 1
997  %mip = extractelement <2 x i16> %coords_hi, i32 0
998  %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 15, i16 %s, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
999  ret <4 x float> %v
1000}
1001
1002define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1003  ; GFX9-LABEL: name: load_mip_2darray
1004  ; GFX9: bb.1.main_body:
1005  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1006  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1007  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1008  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1009  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1010  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1011  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1012  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1013  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1014  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
1015  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
1016  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1017  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1018  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
1019  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1020  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
1021  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1022  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1023  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
1024  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1025  ; GFX9:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
1026  ; GFX9:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
1027  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1028  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1029  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
1030  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
1031  ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
1032  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
1033  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1034  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
1035  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
1036  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
1037  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
1038  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
1039  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
1040  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
1041  ; GFX10NSA-LABEL: name: load_mip_2darray
1042  ; GFX10NSA: bb.1.main_body:
1043  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1044  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1045  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1046  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1047  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1048  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1049  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1050  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1051  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1052  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
1053  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
1054  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1055  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1056  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
1057  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1058  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
1059  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1060  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1061  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
1062  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1063  ; GFX10NSA:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
1064  ; GFX10NSA:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
1065  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1066  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1067  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
1068  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
1069  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
1070  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
1071  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1072  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
1073  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
1074  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
1075  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
1076  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
1077  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
1078  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
1079main_body:
1080  %s = extractelement <2 x i16> %coords_lo, i32 0
1081  %t = extractelement <2 x i16> %coords_lo, i32 1
1082  %slice = extractelement <2 x i16> %coords_hi, i32 0
1083  %mip = extractelement <2 x i16> %coords_hi, i32 1
1084  %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
1085  ret <4 x float> %v
1086}
1087
1088define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
1089  ; GFX9-LABEL: name: store_1d
1090  ; GFX9: bb.1.main_body:
1091  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1092  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1093  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1094  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1095  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1096  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1097  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1098  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1099  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1100  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1101  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1102  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1103  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1104  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1105  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1106  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1107  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1108  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1109  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1110  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1111  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1112  ; GFX9:   S_ENDPGM 0
1113  ; GFX10NSA-LABEL: name: store_1d
1114  ; GFX10NSA: bb.1.main_body:
1115  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1116  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1117  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1118  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1119  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1120  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1121  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1122  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1123  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1124  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1125  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1126  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1127  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1128  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1129  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1130  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1131  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1132  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1133  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1134  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1135  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1136  ; GFX10NSA:   S_ENDPGM 0
1137main_body:
1138  %s = extractelement <2 x i16> %coords, i32 0
1139  call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1140  ret void
1141}
1142
1143define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
1144  ; GFX9-LABEL: name: store_2d
1145  ; GFX9: bb.1.main_body:
1146  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1147  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1148  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1149  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1150  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1151  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1152  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1153  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1154  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1155  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1156  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1157  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1158  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1159  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1160  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1161  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1162  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1163  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1164  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1165  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1166  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1167  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1168  ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1169  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1170  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
1171  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1172  ; GFX9:   S_ENDPGM 0
1173  ; GFX10NSA-LABEL: name: store_2d
1174  ; GFX10NSA: bb.1.main_body:
1175  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1176  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1177  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1178  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1179  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1180  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1181  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1182  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1183  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1184  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1185  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1186  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1187  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1188  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1189  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1190  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1191  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1192  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1193  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1194  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1195  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1196  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1197  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1198  ; GFX10NSA:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1199  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
1200  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1201  ; GFX10NSA:   S_ENDPGM 0
1202main_body:
1203  %s = extractelement <2 x i16> %coords, i32 0
1204  %t = extractelement <2 x i16> %coords, i32 1
1205  call void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
1206  ret void
1207}
1208
1209define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1210  ; GFX9-LABEL: name: store_3d
1211  ; GFX9: bb.1.main_body:
1212  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1213  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1214  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1215  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1216  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1217  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1218  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1219  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1220  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1221  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1222  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1223  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1224  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1225  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1226  ; GFX9:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1227  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1228  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1229  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1230  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1231  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1232  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1233  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1234  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1235  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1236  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1237  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1238  ; GFX9:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1239  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
1240  ; GFX9:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
1241  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1242  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
1243  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1244  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1245  ; GFX9:   S_ENDPGM 0
1246  ; GFX10NSA-LABEL: name: store_3d
1247  ; GFX10NSA: bb.1.main_body:
1248  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1249  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1250  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1251  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1252  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1253  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1254  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1255  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1256  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1257  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1258  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1259  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1260  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1261  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1262  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1263  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1264  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1265  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1266  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1267  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1268  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1269  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1270  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1271  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1272  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1273  ; GFX10NSA:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1274  ; GFX10NSA:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1275  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
1276  ; GFX10NSA:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
1277  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1278  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
1279  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1280  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1281  ; GFX10NSA:   S_ENDPGM 0
1282main_body:
1283  %s = extractelement <2 x i16> %coords_lo, i32 0
1284  %t = extractelement <2 x i16> %coords_lo, i32 1
1285  %r = extractelement <2 x i16> %coords_hi, i32 0
1286  call void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
1287  ret void
1288}
1289
1290define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1291  ; GFX9-LABEL: name: store_cube
1292  ; GFX9: bb.1.main_body:
1293  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1294  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1295  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1296  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1297  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1298  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1299  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1300  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1301  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1302  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1303  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1304  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1305  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1306  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1307  ; GFX9:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1308  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1309  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1310  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1311  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1312  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1313  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1314  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1315  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1316  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1317  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1318  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1319  ; GFX9:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1320  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
1321  ; GFX9:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
1322  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1323  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
1324  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1325  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1326  ; GFX9:   S_ENDPGM 0
1327  ; GFX10NSA-LABEL: name: store_cube
1328  ; GFX10NSA: bb.1.main_body:
1329  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1330  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1331  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1332  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1333  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1334  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1335  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1336  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1337  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1338  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1339  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1340  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1341  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1342  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1343  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1344  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1345  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1346  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1347  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1348  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1349  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1350  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1351  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1352  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1353  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1354  ; GFX10NSA:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1355  ; GFX10NSA:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1356  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
1357  ; GFX10NSA:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
1358  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1359  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
1360  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1361  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1362  ; GFX10NSA:   S_ENDPGM 0
1363main_body:
1364  %s = extractelement <2 x i16> %coords_lo, i32 0
1365  %t = extractelement <2 x i16> %coords_lo, i32 1
1366  %slice = extractelement <2 x i16> %coords_hi, i32 0
1367  call void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1368  ret void
1369}
1370
1371define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
1372  ; GFX9-LABEL: name: store_1darray
1373  ; GFX9: bb.1.main_body:
1374  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1375  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1376  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1377  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1378  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1379  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1380  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1381  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1382  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1383  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1384  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1385  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1386  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1387  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1388  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1389  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1390  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1391  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1392  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1393  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1394  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1395  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1396  ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1397  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1398  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
1399  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1400  ; GFX9:   S_ENDPGM 0
1401  ; GFX10NSA-LABEL: name: store_1darray
1402  ; GFX10NSA: bb.1.main_body:
1403  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1404  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1405  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1406  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1407  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1408  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1409  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1410  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1411  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1412  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1413  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1414  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1415  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1416  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1417  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1418  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1419  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1420  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1421  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1422  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1423  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1424  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1425  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1426  ; GFX10NSA:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1427  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
1428  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1429  ; GFX10NSA:   S_ENDPGM 0
1430main_body:
1431  %s = extractelement <2 x i16> %coords, i32 0
1432  %slice = extractelement <2 x i16> %coords, i32 1
1433  call void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1434  ret void
1435}
1436
1437define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1438  ; GFX9-LABEL: name: store_2darray
1439  ; GFX9: bb.1.main_body:
1440  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1441  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1442  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1443  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1444  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1445  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1446  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1447  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1448  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1449  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1450  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1451  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1452  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1453  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1454  ; GFX9:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1455  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1456  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1457  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1458  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1459  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1460  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1461  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1462  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1463  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1464  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1465  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1466  ; GFX9:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1467  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
1468  ; GFX9:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
1469  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1470  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
1471  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1472  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1473  ; GFX9:   S_ENDPGM 0
1474  ; GFX10NSA-LABEL: name: store_2darray
1475  ; GFX10NSA: bb.1.main_body:
1476  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1477  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1478  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1479  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1480  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1481  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1482  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1483  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1484  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1485  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1486  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1487  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1488  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1489  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1490  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1491  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1492  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1493  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1494  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1495  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1496  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1497  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1498  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1499  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1500  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1501  ; GFX10NSA:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1502  ; GFX10NSA:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1503  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
1504  ; GFX10NSA:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
1505  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1506  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
1507  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1508  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1509  ; GFX10NSA:   S_ENDPGM 0
1510main_body:
1511  %s = extractelement <2 x i16> %coords_lo, i32 0
1512  %t = extractelement <2 x i16> %coords_lo, i32 1
1513  %slice = extractelement <2 x i16> %coords_hi, i32 0
1514  call void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1515  ret void
1516}
1517
1518define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1519  ; GFX9-LABEL: name: store_2dmsaa
1520  ; GFX9: bb.1.main_body:
1521  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1522  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1523  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1524  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1525  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1526  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1527  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1528  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1529  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1530  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1531  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1532  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1533  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1534  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1535  ; GFX9:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1536  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1537  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1538  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1539  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1540  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1541  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1542  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1543  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1544  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1545  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1546  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1547  ; GFX9:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1548  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
1549  ; GFX9:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
1550  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1551  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
1552  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1553  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1554  ; GFX9:   S_ENDPGM 0
1555  ; GFX10NSA-LABEL: name: store_2dmsaa
1556  ; GFX10NSA: bb.1.main_body:
1557  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1558  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1559  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1560  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1561  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1562  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1563  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1564  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1565  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1566  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1567  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1568  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1569  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1570  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1571  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1572  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1573  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1574  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1575  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1576  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1577  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1578  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1579  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1580  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1581  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1582  ; GFX10NSA:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1583  ; GFX10NSA:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1584  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
1585  ; GFX10NSA:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
1586  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1587  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
1588  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1589  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1590  ; GFX10NSA:   S_ENDPGM 0
1591main_body:
1592  %s = extractelement <2 x i16> %coords_lo, i32 0
1593  %t = extractelement <2 x i16> %coords_lo, i32 1
1594  %fragid = extractelement <2 x i16> %coords_hi, i32 0
1595  call void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1596  ret void
1597}
1598
1599define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1600  ; GFX9-LABEL: name: store_2darraymsaa
1601  ; GFX9: bb.1.main_body:
1602  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1603  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1604  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1605  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1606  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1607  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1608  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1609  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1610  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1611  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1612  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1613  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1614  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1615  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1616  ; GFX9:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1617  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1618  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1619  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1620  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1621  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1622  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1623  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1624  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1625  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1626  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1627  ; GFX9:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1628  ; GFX9:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
1629  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1630  ; GFX9:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1631  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
1632  ; GFX9:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
1633  ; GFX9:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
1634  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
1635  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1636  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1637  ; GFX9:   S_ENDPGM 0
1638  ; GFX10NSA-LABEL: name: store_2darraymsaa
1639  ; GFX10NSA: bb.1.main_body:
1640  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1641  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1642  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1643  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1644  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1645  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1646  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1647  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1648  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1649  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1650  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1651  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1652  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1653  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1654  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1655  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1656  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1657  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1658  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1659  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1660  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1661  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1662  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1663  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1664  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1665  ; GFX10NSA:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1666  ; GFX10NSA:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
1667  ; GFX10NSA:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1668  ; GFX10NSA:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1669  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
1670  ; GFX10NSA:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
1671  ; GFX10NSA:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
1672  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
1673  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1674  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1675  ; GFX10NSA:   S_ENDPGM 0
1676main_body:
1677  %s = extractelement <2 x i16> %coords_lo, i32 0
1678  %t = extractelement <2 x i16> %coords_lo, i32 1
1679  %slice = extractelement <2 x i16> %coords_hi, i32 0
1680  %fragid = extractelement <2 x i16> %coords_hi, i32 1
1681  call void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1682  ret void
1683}
1684
1685define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
1686  ; GFX9-LABEL: name: store_mip_1d
1687  ; GFX9: bb.1.main_body:
1688  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1689  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1690  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1691  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1692  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1693  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1694  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1695  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1696  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1697  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1698  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1699  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1700  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1701  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1702  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1703  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1704  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1705  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1706  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1707  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1708  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1709  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1710  ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1711  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1712  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
1713  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1714  ; GFX9:   S_ENDPGM 0
1715  ; GFX10NSA-LABEL: name: store_mip_1d
1716  ; GFX10NSA: bb.1.main_body:
1717  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1718  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1719  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1720  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1721  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1722  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1723  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1724  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1725  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1726  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1727  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1728  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1729  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1730  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1731  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1732  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1733  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1734  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1735  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1736  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1737  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1738  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1739  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1740  ; GFX10NSA:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1741  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
1742  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1743  ; GFX10NSA:   S_ENDPGM 0
1744main_body:
1745  %s = extractelement <2 x i16> %coords, i32 0
1746  %mip = extractelement <2 x i16> %coords, i32 1
1747  call void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
1748  ret void
1749}
1750
1751define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1752  ; GFX9-LABEL: name: store_mip_2d
1753  ; GFX9: bb.1.main_body:
1754  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1755  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1756  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1757  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1758  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1759  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1760  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1761  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1762  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1763  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1764  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1765  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1766  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1767  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1768  ; GFX9:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1769  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1770  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1771  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1772  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1773  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1774  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1775  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1776  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1777  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1778  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1779  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1780  ; GFX9:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1781  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
1782  ; GFX9:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
1783  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1784  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
1785  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1786  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1787  ; GFX9:   S_ENDPGM 0
1788  ; GFX10NSA-LABEL: name: store_mip_2d
1789  ; GFX10NSA: bb.1.main_body:
1790  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1791  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1792  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1793  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1794  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1795  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1796  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1797  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1798  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1799  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1800  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1801  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1802  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1803  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1804  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1805  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1806  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1807  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1808  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1809  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1810  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1811  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1812  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1813  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1814  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1815  ; GFX10NSA:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1816  ; GFX10NSA:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1817  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
1818  ; GFX10NSA:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
1819  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1820  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
1821  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1822  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1823  ; GFX10NSA:   S_ENDPGM 0
1824main_body:
1825  %s = extractelement <2 x i16> %coords_lo, i32 0
1826  %t = extractelement <2 x i16> %coords_lo, i32 1
1827  %mip = extractelement <2 x i16> %coords_hi, i32 0
1828  call void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
1829  ret void
1830}
1831
1832define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1833  ; GFX9-LABEL: name: store_mip_3d
1834  ; GFX9: bb.1.main_body:
1835  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1836  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1837  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1838  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1839  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1840  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1841  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1842  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1843  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1844  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1845  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1846  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1847  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1848  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1849  ; GFX9:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1850  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1851  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1852  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1853  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1854  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1855  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1856  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1857  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1858  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1859  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1860  ; GFX9:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1861  ; GFX9:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
1862  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1863  ; GFX9:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1864  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
1865  ; GFX9:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
1866  ; GFX9:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
1867  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
1868  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1869  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1870  ; GFX9:   S_ENDPGM 0
1871  ; GFX10NSA-LABEL: name: store_mip_3d
1872  ; GFX10NSA: bb.1.main_body:
1873  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1874  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1875  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1876  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1877  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1878  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1879  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1880  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1881  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1882  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1883  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1884  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1885  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1886  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1887  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1888  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1889  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1890  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1891  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1892  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1893  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1894  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1895  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1896  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1897  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1898  ; GFX10NSA:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1899  ; GFX10NSA:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
1900  ; GFX10NSA:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1901  ; GFX10NSA:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1902  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
1903  ; GFX10NSA:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
1904  ; GFX10NSA:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
1905  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
1906  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1907  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1908  ; GFX10NSA:   S_ENDPGM 0
1909main_body:
1910  %s = extractelement <2 x i16> %coords_lo, i32 0
1911  %t = extractelement <2 x i16> %coords_lo, i32 1
1912  %r = extractelement <2 x i16> %coords_hi, i32 0
1913  %mip = extractelement <2 x i16> %coords_hi, i32 1
1914  call void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %r, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
1915  ret void
1916}
1917
1918define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1919  ; GFX9-LABEL: name: store_mip_cube
1920  ; GFX9: bb.1.main_body:
1921  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1922  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1923  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1924  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1925  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1926  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1927  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1928  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1929  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1930  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1931  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1932  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1933  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1934  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1935  ; GFX9:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1936  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1937  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1938  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1939  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1940  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1941  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1942  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1943  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1944  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1945  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1946  ; GFX9:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1947  ; GFX9:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
1948  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1949  ; GFX9:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1950  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
1951  ; GFX9:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
1952  ; GFX9:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
1953  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
1954  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1955  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1956  ; GFX9:   S_ENDPGM 0
1957  ; GFX10NSA-LABEL: name: store_mip_cube
1958  ; GFX10NSA: bb.1.main_body:
1959  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1960  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1961  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1962  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1963  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1964  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1965  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1966  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1967  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1968  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1969  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1970  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1971  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1972  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1973  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1974  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1975  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1976  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1977  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1978  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1979  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1980  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1981  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1982  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1983  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1984  ; GFX10NSA:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1985  ; GFX10NSA:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
1986  ; GFX10NSA:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
1987  ; GFX10NSA:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
1988  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
1989  ; GFX10NSA:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
1990  ; GFX10NSA:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
1991  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
1992  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1993  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
1994  ; GFX10NSA:   S_ENDPGM 0
1995main_body:
1996  %s = extractelement <2 x i16> %coords_lo, i32 0
1997  %t = extractelement <2 x i16> %coords_lo, i32 1
1998  %slice = extractelement <2 x i16> %coords_hi, i32 0
1999  %mip = extractelement <2 x i16> %coords_hi, i32 1
2000  call void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2001  ret void
2002}
2003
2004define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
2005  ; GFX9-LABEL: name: store_mip_1darray
2006  ; GFX9: bb.1.main_body:
2007  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
2008  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2009  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2010  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2011  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2012  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2013  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2014  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2015  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2016  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2017  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2018  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
2019  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
2020  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
2021  ; GFX9:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
2022  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2023  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
2024  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2025  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2026  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2027  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2028  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
2029  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
2030  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
2031  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
2032  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
2033  ; GFX9:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
2034  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
2035  ; GFX9:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
2036  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
2037  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
2038  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
2039  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
2040  ; GFX9:   S_ENDPGM 0
2041  ; GFX10NSA-LABEL: name: store_mip_1darray
2042  ; GFX10NSA: bb.1.main_body:
2043  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
2044  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2045  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2046  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2047  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2048  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2049  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2050  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2051  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2052  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2053  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2054  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
2055  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
2056  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
2057  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
2058  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2059  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
2060  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2061  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2062  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2063  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2064  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
2065  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
2066  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
2067  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
2068  ; GFX10NSA:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
2069  ; GFX10NSA:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
2070  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
2071  ; GFX10NSA:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
2072  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
2073  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
2074  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
2075  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
2076  ; GFX10NSA:   S_ENDPGM 0
2077main_body:
2078  %s = extractelement <2 x i16> %coords_lo, i32 0
2079  %slice = extractelement <2 x i16> %coords_lo, i32 1
2080  %mip = extractelement <2 x i16> %coords_hi, i32 0
2081  call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2082  ret void
2083}
2084
2085define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
2086  ; GFX9-LABEL: name: store_mip_2darray
2087  ; GFX9: bb.1.main_body:
2088  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
2089  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2090  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2091  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2092  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2093  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2094  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2095  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2096  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2097  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2098  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2099  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
2100  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
2101  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
2102  ; GFX9:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
2103  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2104  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
2105  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2106  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2107  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2108  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2109  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
2110  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
2111  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
2112  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
2113  ; GFX9:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
2114  ; GFX9:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
2115  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
2116  ; GFX9:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
2117  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
2118  ; GFX9:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
2119  ; GFX9:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
2120  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
2121  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
2122  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
2123  ; GFX9:   S_ENDPGM 0
2124  ; GFX10NSA-LABEL: name: store_mip_2darray
2125  ; GFX10NSA: bb.1.main_body:
2126  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
2127  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2128  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2129  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2130  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2131  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2132  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2133  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2134  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2135  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2136  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2137  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
2138  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
2139  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
2140  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
2141  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2142  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
2143  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2144  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2145  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2146  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2147  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
2148  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
2149  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
2150  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
2151  ; GFX10NSA:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
2152  ; GFX10NSA:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
2153  ; GFX10NSA:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
2154  ; GFX10NSA:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
2155  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
2156  ; GFX10NSA:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
2157  ; GFX10NSA:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
2158  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
2159  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
2160  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
2161  ; GFX10NSA:   S_ENDPGM 0
2162main_body:
2163  %s = extractelement <2 x i16> %coords_lo, i32 0
2164  %t = extractelement <2 x i16> %coords_lo, i32 1
2165  %slice = extractelement <2 x i16> %coords_hi, i32 0
2166  %mip = extractelement <2 x i16> %coords_hi, i32 1
2167  call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2168  ret void
2169}
2170
2171define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2172  ; GFX9-LABEL: name: getresinfo_1d
2173  ; GFX9: bb.1.main_body:
2174  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2175  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2176  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2177  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2178  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2179  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2180  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2181  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2182  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2183  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2184  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2185  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2186  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2187  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2188  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2189  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2190  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2191  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2192  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2193  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2194  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2195  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2196  ; GFX10NSA-LABEL: name: getresinfo_1d
2197  ; GFX10NSA: bb.1.main_body:
2198  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2199  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2200  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2201  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2202  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2203  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2204  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2205  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2206  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2207  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2208  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2209  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2210  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2211  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2212  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2213  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2214  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2215  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2216  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2217  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2218  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2219  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2220main_body:
2221  %mip = extractelement <2 x i16> %coords, i32 0
2222  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2223  ret <4 x float> %v
2224}
2225
2226define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2227  ; GFX9-LABEL: name: getresinfo_2d
2228  ; GFX9: bb.1.main_body:
2229  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2230  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2231  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2232  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2233  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2234  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2235  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2236  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2237  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2238  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2239  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2240  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2241  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2242  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2243  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2244  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2245  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2246  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2247  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2248  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2249  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2250  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2251  ; GFX10NSA-LABEL: name: getresinfo_2d
2252  ; GFX10NSA: bb.1.main_body:
2253  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2254  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2255  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2256  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2257  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2258  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2259  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2260  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2261  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2262  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2263  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2264  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2265  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2266  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2267  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2268  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2269  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2270  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2271  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2272  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2273  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2274  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2275main_body:
2276  %mip = extractelement <2 x i16> %coords, i32 0
2277  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2278  ret <4 x float> %v
2279}
2280
2281define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2282  ; GFX9-LABEL: name: getresinfo_3d
2283  ; GFX9: bb.1.main_body:
2284  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2285  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2286  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2287  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2288  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2289  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2290  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2291  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2292  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2293  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2294  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2295  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2296  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2297  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2298  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2299  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2300  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2301  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2302  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2303  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2304  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2305  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2306  ; GFX10NSA-LABEL: name: getresinfo_3d
2307  ; GFX10NSA: bb.1.main_body:
2308  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2309  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2310  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2311  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2312  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2313  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2314  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2315  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2316  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2317  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2318  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2319  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2320  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2321  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2322  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2323  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2324  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2325  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2326  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2327  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2328  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2329  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2330main_body:
2331  %mip = extractelement <2 x i16> %coords, i32 0
2332  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2333  ret <4 x float> %v
2334}
2335
2336define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2337  ; GFX9-LABEL: name: getresinfo_cube
2338  ; GFX9: bb.1.main_body:
2339  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2340  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2341  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2342  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2343  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2344  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2345  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2346  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2347  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2348  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2349  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2350  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2351  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2352  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2353  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2354  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2355  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2356  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2357  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2358  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2359  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2360  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2361  ; GFX10NSA-LABEL: name: getresinfo_cube
2362  ; GFX10NSA: bb.1.main_body:
2363  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2364  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2365  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2366  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2367  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2368  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2369  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2370  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2371  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2372  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2373  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2374  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2375  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2376  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2377  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2378  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2379  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2380  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2381  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2382  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2383  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2384  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2385main_body:
2386  %mip = extractelement <2 x i16> %coords, i32 0
2387  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2388  ret <4 x float> %v
2389}
2390
2391define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2392  ; GFX9-LABEL: name: getresinfo_1darray
2393  ; GFX9: bb.1.main_body:
2394  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2395  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2396  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2397  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2398  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2399  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2400  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2401  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2402  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2403  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2404  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2405  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2406  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2407  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2408  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2409  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2410  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2411  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2412  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2413  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2414  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2415  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2416  ; GFX10NSA-LABEL: name: getresinfo_1darray
2417  ; GFX10NSA: bb.1.main_body:
2418  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2419  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2420  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2421  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2422  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2423  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2424  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2425  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2426  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2427  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2428  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2429  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2430  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2431  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2432  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2433  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2434  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2435  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2436  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2437  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2438  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2439  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2440main_body:
2441  %mip = extractelement <2 x i16> %coords, i32 0
2442  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2443  ret <4 x float> %v
2444}
2445
2446define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2447  ; GFX9-LABEL: name: getresinfo_2darray
2448  ; GFX9: bb.1.main_body:
2449  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2450  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2451  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2452  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2453  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2454  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2455  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2456  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2457  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2458  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2459  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2460  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2461  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2462  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2463  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2464  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2465  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2466  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2467  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2468  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2469  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2470  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2471  ; GFX10NSA-LABEL: name: getresinfo_2darray
2472  ; GFX10NSA: bb.1.main_body:
2473  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2474  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2475  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2476  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2477  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2478  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2479  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2480  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2481  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2482  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2483  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2484  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2485  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2486  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2487  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2488  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2489  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2490  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2491  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2492  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2493  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2494  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2495main_body:
2496  %mip = extractelement <2 x i16> %coords, i32 0
2497  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2498  ret <4 x float> %v
2499}
2500
2501define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2502  ; GFX9-LABEL: name: getresinfo_2dmsaa
2503  ; GFX9: bb.1.main_body:
2504  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2505  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2506  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2507  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2508  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2509  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2510  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2511  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2512  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2513  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2514  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2515  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2516  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2517  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2518  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2519  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2520  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2521  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2522  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2523  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2524  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2525  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2526  ; GFX10NSA-LABEL: name: getresinfo_2dmsaa
2527  ; GFX10NSA: bb.1.main_body:
2528  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2529  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2530  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2531  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2532  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2533  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2534  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2535  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2536  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2537  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2538  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2539  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2540  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2541  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2542  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2543  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2544  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2545  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2546  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2547  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2548  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2549  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2550main_body:
2551  %mip = extractelement <2 x i16> %coords, i32 0
2552  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2553  ret <4 x float> %v
2554}
2555
2556define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2557  ; GFX9-LABEL: name: getresinfo_2darraymsaa
2558  ; GFX9: bb.1.main_body:
2559  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2560  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2561  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2562  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2563  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2564  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2565  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2566  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2567  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2568  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2569  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2570  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2571  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2572  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2573  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2574  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2575  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2576  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2577  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2578  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2579  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2580  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2581  ; GFX10NSA-LABEL: name: getresinfo_2darraymsaa
2582  ; GFX10NSA: bb.1.main_body:
2583  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2584  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2585  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2586  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2587  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2588  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2589  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2590  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2591  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2592  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2593  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2594  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2595  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2596  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2597  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2598  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2599  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2600  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2601  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2602  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2603  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2604  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2605main_body:
2606  %mip = extractelement <2 x i16> %coords, i32 0
2607  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2608  ret <4 x float> %v
2609}
2610
2611define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2612  ; GFX9-LABEL: name: load_1d_V1
2613  ; GFX9: bb.1.main_body:
2614  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2615  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2616  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2617  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2618  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2619  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2620  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2621  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2622  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2623  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2624  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2625  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2626  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2627  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2628  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2629  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 4 from custom "TargetCustom8")
2630  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
2631  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
2632  ; GFX10NSA-LABEL: name: load_1d_V1
2633  ; GFX10NSA: bb.1.main_body:
2634  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2635  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2636  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2637  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2638  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2639  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2640  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2641  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2642  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2643  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2644  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2645  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2646  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2647  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2648  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2649  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 4 from custom "TargetCustom8")
2650  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
2651  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
2652main_body:
2653  %s = extractelement <2 x i16> %coords, i32 0
2654  %v = call float @llvm.amdgcn.image.load.1d.f32.i16(i32 8, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
2655  ret float %v
2656}
2657
2658define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2659  ; GFX9-LABEL: name: load_1d_V2
2660  ; GFX9: bb.1.main_body:
2661  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2662  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2663  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2664  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2665  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2666  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2667  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2668  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2669  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2670  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2671  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2672  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2673  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2674  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2675  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2676  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 8 from custom "TargetCustom8")
2677  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
2678  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2679  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2680  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
2681  ; GFX10NSA-LABEL: name: load_1d_V2
2682  ; GFX10NSA: bb.1.main_body:
2683  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2684  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2685  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2686  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2687  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2688  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2689  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2690  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2691  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2692  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2693  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2694  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2695  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2696  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2697  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2698  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 8 from custom "TargetCustom8")
2699  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
2700  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2701  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2702  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
2703main_body:
2704  %s = extractelement <2 x i16> %coords, i32 0
2705  %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32 9, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
2706  ret <2 x float> %v
2707}
2708
2709define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, <2 x i16> %coords) {
2710  ; GFX9-LABEL: name: store_1d_V1
2711  ; GFX9: bb.1.main_body:
2712  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
2713  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2714  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2715  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2716  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2717  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2718  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2719  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2720  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2721  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2722  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
2723  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2724  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2725  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
2726  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2727  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2728  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 4 into custom "TargetCustom8")
2729  ; GFX9:   S_ENDPGM 0
2730  ; GFX10NSA-LABEL: name: store_1d_V1
2731  ; GFX10NSA: bb.1.main_body:
2732  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
2733  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2734  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2735  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2736  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2737  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2738  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2739  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2740  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2741  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2742  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
2743  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2744  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2745  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
2746  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2747  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2748  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 4 into custom "TargetCustom8")
2749  ; GFX10NSA:   S_ENDPGM 0
2750main_body:
2751  %s = extractelement <2 x i16> %coords, i32 0
2752  call void @llvm.amdgcn.image.store.1d.f32.i16(float %vdata, i32 2, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
2753  ret void
2754}
2755
2756define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, <2 x i16> %coords) {
2757  ; GFX9-LABEL: name: store_1d_V2
2758  ; GFX9: bb.1.main_body:
2759  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
2760  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2761  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2762  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2763  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2764  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2765  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2766  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2767  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2768  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2769  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2770  ; GFX9:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
2771  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2772  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
2773  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2774  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>)
2775  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2776  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2777  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 8 into custom "TargetCustom8")
2778  ; GFX9:   S_ENDPGM 0
2779  ; GFX10NSA-LABEL: name: store_1d_V2
2780  ; GFX10NSA: bb.1.main_body:
2781  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
2782  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2783  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2784  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2785  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2786  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2787  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2788  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2789  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2790  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2791  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2792  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
2793  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2794  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
2795  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2796  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>)
2797  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2798  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2799  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 8 into custom "TargetCustom8")
2800  ; GFX10NSA:   S_ENDPGM 0
2801main_body:
2802  %s = extractelement <2 x i16> %coords, i32 0
2803  call void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float> %vdata, i32 12, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
2804  ret void
2805}
2806
2807define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2808  ; GFX9-LABEL: name: load_1d_glc
2809  ; GFX9: bb.1.main_body:
2810  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2811  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2812  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2813  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2814  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2815  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2816  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2817  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2818  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2819  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2820  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2821  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2822  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2823  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2824  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2825  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
2826  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2827  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2828  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2829  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2830  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2831  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2832  ; GFX10NSA-LABEL: name: load_1d_glc
2833  ; GFX10NSA: bb.1.main_body:
2834  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2835  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2836  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2837  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2838  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2839  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2840  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2841  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2842  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2843  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2844  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2845  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2846  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2847  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2848  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2849  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
2850  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2851  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2852  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2853  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2854  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2855  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2856main_body:
2857  %s = extractelement <2 x i16> %coords, i32 0
2858  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 1)
2859  ret <4 x float> %v
2860}
2861
2862define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2863  ; GFX9-LABEL: name: load_1d_slc
2864  ; GFX9: bb.1.main_body:
2865  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2866  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2867  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2868  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2869  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2870  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2871  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2872  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2873  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2874  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2875  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2876  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2877  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2878  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2879  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2880  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
2881  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2882  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2883  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2884  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2885  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2886  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2887  ; GFX10NSA-LABEL: name: load_1d_slc
2888  ; GFX10NSA: bb.1.main_body:
2889  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2890  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2891  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2892  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2893  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2894  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2895  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2896  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2897  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2898  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2899  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2900  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2901  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2902  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2903  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2904  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
2905  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2906  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2907  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2908  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2909  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2910  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2911main_body:
2912  %s = extractelement <2 x i16> %coords, i32 0
2913  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
2914  ret <4 x float> %v
2915}
2916
2917define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2918  ; GFX9-LABEL: name: load_1d_glc_slc
2919  ; GFX9: bb.1.main_body:
2920  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2921  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2922  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2923  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2924  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2925  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2926  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2927  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2928  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2929  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2930  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2931  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2932  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2933  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2934  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2935  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
2936  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2937  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2938  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2939  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2940  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2941  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2942  ; GFX10NSA-LABEL: name: load_1d_glc_slc
2943  ; GFX10NSA: bb.1.main_body:
2944  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2945  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2946  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2947  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2948  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2949  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2950  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2951  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2952  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2953  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2954  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2955  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2956  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2957  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2958  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2959  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
2960  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2961  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2962  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2963  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2964  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2965  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2966main_body:
2967  %s = extractelement <2 x i16> %coords, i32 0
2968  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 3)
2969  ret <4 x float> %v
2970}
2971
2972define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
2973  ; GFX9-LABEL: name: store_1d_glc
2974  ; GFX9: bb.1.main_body:
2975  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
2976  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2977  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2978  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2979  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2980  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2981  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2982  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2983  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2984  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2985  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2986  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
2987  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
2988  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
2989  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2990  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
2991  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2992  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2993  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2994  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2995  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
2996  ; GFX9:   S_ENDPGM 0
2997  ; GFX10NSA-LABEL: name: store_1d_glc
2998  ; GFX10NSA: bb.1.main_body:
2999  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3000  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3001  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3002  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3003  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3004  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3005  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3006  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3007  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3008  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
3009  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
3010  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
3011  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
3012  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
3013  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3014  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
3015  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3016  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
3017  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3018  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3019  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
3020  ; GFX10NSA:   S_ENDPGM 0
3021main_body:
3022  %s = extractelement <2 x i16> %coords, i32 0
3023  call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 1)
3024  ret void
3025}
3026
3027define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
3028  ; GFX9-LABEL: name: store_1d_slc
3029  ; GFX9: bb.1.main_body:
3030  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3031  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3032  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3033  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3034  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3035  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3036  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3037  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3038  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3039  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
3040  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
3041  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
3042  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
3043  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
3044  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3045  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
3046  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3047  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
3048  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3049  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3050  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
3051  ; GFX9:   S_ENDPGM 0
3052  ; GFX10NSA-LABEL: name: store_1d_slc
3053  ; GFX10NSA: bb.1.main_body:
3054  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3055  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3056  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3057  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3058  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3059  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3060  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3061  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3062  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3063  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
3064  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
3065  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
3066  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
3067  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
3068  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3069  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
3070  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3071  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
3072  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3073  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3074  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
3075  ; GFX10NSA:   S_ENDPGM 0
3076main_body:
3077  %s = extractelement <2 x i16> %coords, i32 0
3078  call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
3079  ret void
3080}
3081
3082define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
3083  ; GFX9-LABEL: name: store_1d_glc_slc
3084  ; GFX9: bb.1.main_body:
3085  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3086  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3087  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3088  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3089  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3090  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3091  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3092  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3093  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3094  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
3095  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
3096  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
3097  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
3098  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
3099  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3100  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
3101  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3102  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
3103  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3104  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3105  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
3106  ; GFX9:   S_ENDPGM 0
3107  ; GFX10NSA-LABEL: name: store_1d_glc_slc
3108  ; GFX10NSA: bb.1.main_body:
3109  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3110  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3111  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3112  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3113  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3114  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3115  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3116  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3117  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3118  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
3119  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
3120  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
3121  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
3122  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
3123  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3124  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
3125  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3126  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
3127  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3128  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3129  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
3130  ; GFX10NSA:   S_ENDPGM 0
3131main_body:
3132  %s = extractelement <2 x i16> %coords, i32 0
3133  call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 3)
3134  ret void
3135}
3136
3137define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
3138  ; GFX9-LABEL: name: getresinfo_dmask0
3139  ; GFX9: bb.1.main_body:
3140  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
3141  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3142  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3143  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3144  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3145  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3146  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3147  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3148  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3149  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3150  ; GFX9:   [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
3151  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
3152  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
3153  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
3154  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
3155  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
3156  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3157  ; GFX10NSA-LABEL: name: getresinfo_dmask0
3158  ; GFX10NSA: bb.1.main_body:
3159  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
3160  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3161  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3162  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3163  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3164  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3165  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3166  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3167  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3168  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3169  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
3170  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
3171  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
3172  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
3173  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
3174  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
3175  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3176main_body:
3177  %mip = extractelement <2 x i16> %coords, i32 0
3178  %r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 0, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
3179  ret <4 x float> %r
3180}
3181
3182define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
3183  ; GFX9-LABEL: name: load_1d_tfe
3184  ; GFX9: bb.1.main_body:
3185  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
3186  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3187  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3188  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3189  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3190  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3191  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3192  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3193  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3194  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3195  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3196  ; GFX9:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3197  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3198  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3199  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3200  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3201  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
3202  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3203  ; GFX9:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
3204  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
3205  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
3206  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
3207  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
3208  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3209  ; GFX10NSA-LABEL: name: load_1d_tfe
3210  ; GFX10NSA: bb.1.main_body:
3211  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
3212  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3213  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3214  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3215  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3216  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3217  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3218  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3219  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3220  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3221  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3222  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3223  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3224  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3225  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3226  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3227  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
3228  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3229  ; GFX10NSA:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
3230  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
3231  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
3232  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
3233  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
3234  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3235main_body:
3236  %s = extractelement <2 x i16> %coords, i32 0
3237  %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 1, i32 0)
3238  %data = extractvalue { <4 x float>, i32 } %v, 0
3239  %tfe = extractvalue { <4 x float>, i32 } %v, 1
3240  store i32 %tfe, i32 addrspace(1)* undef
3241  ret <4 x float> %data
3242}
3243
3244define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
3245  ; GFX9-LABEL: name: load_2d_tfe
3246  ; GFX9: bb.1.main_body:
3247  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
3248  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3249  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3250  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3251  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3252  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3253  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3254  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3255  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3256  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3257  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3258  ; GFX9:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3259  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3260  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3261  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3262  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3263  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
3264  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
3265  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
3266  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
3267  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
3268  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
3269  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3270  ; GFX9:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
3271  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
3272  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
3273  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
3274  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
3275  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3276  ; GFX10NSA-LABEL: name: load_2d_tfe
3277  ; GFX10NSA: bb.1.main_body:
3278  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
3279  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3280  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3281  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3282  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3283  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3284  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3285  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3286  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3287  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3288  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3289  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3290  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3291  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3292  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3293  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3294  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
3295  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
3296  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
3297  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
3298  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
3299  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
3300  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3301  ; GFX10NSA:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
3302  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
3303  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
3304  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
3305  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
3306  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3307main_body:
3308  %s = extractelement <2 x i16> %coords, i32 0
3309  %t = extractelement <2 x i16> %coords, i32 1
3310  %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 1, i32 0)
3311  %data = extractvalue { <4 x float>, i32 } %v, 0
3312  %tfe = extractvalue { <4 x float>, i32 } %v, 1
3313  store i32 %tfe, i32 addrspace(1)* undef
3314  ret <4 x float> %data
3315}
3316
3317define amdgpu_ps <4 x float> @load_3d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
3318  ; GFX9-LABEL: name: load_3d_tfe
3319  ; GFX9: bb.1.main_body:
3320  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
3321  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3322  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3323  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3324  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3325  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3326  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3327  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3328  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3329  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3330  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
3331  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3332  ; GFX9:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3333  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3334  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3335  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3336  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3337  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
3338  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
3339  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
3340  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
3341  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
3342  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
3343  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
3344  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
3345  ; GFX9:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
3346  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF1]](s32)
3347  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
3348  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
3349  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3350  ; GFX9:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
3351  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
3352  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
3353  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
3354  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
3355  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3356  ; GFX10NSA-LABEL: name: load_3d_tfe
3357  ; GFX10NSA: bb.1.main_body:
3358  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
3359  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3360  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3361  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3362  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3363  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3364  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3365  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3366  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3367  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3368  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
3369  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3370  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3371  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3372  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3373  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3374  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3375  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
3376  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
3377  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
3378  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
3379  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
3380  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
3381  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
3382  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
3383  ; GFX10NSA:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
3384  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF1]](s32)
3385  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
3386  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
3387  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3388  ; GFX10NSA:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
3389  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
3390  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
3391  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
3392  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
3393  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3394main_body:
3395  %s = extractelement <2 x i16> %coords_lo, i32 0
3396  %t = extractelement <2 x i16> %coords_lo, i32 1
3397  %r = extractelement <2 x i16> %coords_hi, i32 0
3398  %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 1, i32 0)
3399  %data = extractvalue { <4 x float>, i32 } %v, 0
3400  %tfe = extractvalue { <4 x float>, i32 } %v, 1
3401  store i32 %tfe, i32 addrspace(1)* undef
3402  ret <4 x float> %data
3403}
3404
3405define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
3406  ; GFX9-LABEL: name: load_2darraymsaa_tfe
3407  ; GFX9: bb.1.main_body:
3408  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
3409  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3410  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3411  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3412  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3413  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3414  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3415  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3416  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3417  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3418  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
3419  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3420  ; GFX9:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3421  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3422  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3423  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3424  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3425  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
3426  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
3427  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
3428  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
3429  ; GFX9:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
3430  ; GFX9:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
3431  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
3432  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
3433  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
3434  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
3435  ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
3436  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
3437  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
3438  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
3439  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3440  ; GFX9:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
3441  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
3442  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
3443  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
3444  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
3445  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3446  ; GFX10NSA-LABEL: name: load_2darraymsaa_tfe
3447  ; GFX10NSA: bb.1.main_body:
3448  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
3449  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3450  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3451  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3452  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3453  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3454  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3455  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3456  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3457  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3458  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
3459  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3460  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3461  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3462  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3463  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3464  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3465  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
3466  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
3467  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
3468  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
3469  ; GFX10NSA:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
3470  ; GFX10NSA:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
3471  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
3472  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
3473  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
3474  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
3475  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
3476  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
3477  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
3478  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
3479  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3480  ; GFX10NSA:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
3481  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
3482  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
3483  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
3484  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
3485  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3486main_body:
3487  %s = extractelement <2 x i16> %coords_lo, i32 0
3488  %t = extractelement <2 x i16> %coords_lo, i32 1
3489  %slice = extractelement <2 x i16> %coords_hi, i32 0
3490  %fragid = extractelement <2 x i16> %coords_hi, i32 1
3491  %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
3492  %data = extractvalue { <4 x float>, i32 } %v, 0
3493  %tfe = extractvalue { <4 x float>, i32 } %v, 1
3494  store i32 %tfe, i32 addrspace(1)* undef
3495  ret <4 x float> %data
3496}
3497
3498declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3499declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3500declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3501declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3502declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3503declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3504declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3505declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3506declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3507declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3508declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3509declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3510declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3511declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3512declare void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float>, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3513declare void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3514declare void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3515declare void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3516declare void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3517declare void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3518declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3519declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3520declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3521declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3522declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3523declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3524declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3525declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3526declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3527declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3528declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3529declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3530declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3531declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3532declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3533declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3534declare float @llvm.amdgcn.image.load.1d.f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3535declare float @llvm.amdgcn.image.load.2d.f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3536declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3537declare void @llvm.amdgcn.image.store.1d.f32.i16(float, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3538declare void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float>, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3539declare { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3540declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3541declare { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3542declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3543
3544attributes #0 = { nounwind }
3545attributes #1 = { nounwind readonly }
3546attributes #2 = { nounwind writeonly }
3547attributes #3 = { nounwind readnone }
3548