1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s
4
5define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
6  ; GFX9-LABEL: name: load_1d
7  ; GFX9: bb.1.main_body:
8  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
9  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
10  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
11  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
12  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
13  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
14  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
15  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
16  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
17  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
18  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
19  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
20  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
21  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
22  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
23  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
24  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
25  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
26  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
27  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
28  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
29  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
30  ; GFX10NSA-LABEL: name: load_1d
31  ; GFX10NSA: bb.1.main_body:
32  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
33  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
34  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
35  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
36  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
37  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
38  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
39  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
40  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
41  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
42  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
43  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
44  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
45  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
46  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
47  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
48  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
49  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
50  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
51  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
52  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
53  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
54main_body:
55  %s = extractelement <2 x i16> %coords, i32 0
56  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
57  ret <4 x float> %v
58}
59
60define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
61  ; GFX9-LABEL: name: load_2d
62  ; GFX9: bb.1.main_body:
63  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
64  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
65  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
66  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
67  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
68  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
69  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
70  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
71  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
72  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
73  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
74  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
75  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
76  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
77  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
78  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
79  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
80  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
81  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
82  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
83  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
84  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
85  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
86  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
87  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
88  ; GFX10NSA-LABEL: name: load_2d
89  ; GFX10NSA: bb.1.main_body:
90  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
91  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
92  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
93  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
94  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
95  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
96  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
97  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
98  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
99  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
100  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
101  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
102  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
103  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
104  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
105  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
106  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
107  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
108  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
109  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
110  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
111  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
112  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
113  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
114  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
115main_body:
116  %s = extractelement <2 x i16> %coords, i32 0
117  %t = extractelement <2 x i16> %coords, i32 1
118  %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
119  ret <4 x float> %v
120}
121
122define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
123  ; GFX9-LABEL: name: load_3d
124  ; GFX9: bb.1.main_body:
125  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
126  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
127  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
128  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
129  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
130  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
131  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
132  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
133  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
134  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
135  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
136  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
137  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
138  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
139  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
140  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
141  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
142  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
143  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
144  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
145  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
146  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
147  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
148  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
149  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
150  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
151  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
152  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
153  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
154  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
155  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
156  ; GFX10NSA-LABEL: name: load_3d
157  ; GFX10NSA: bb.1.main_body:
158  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
159  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
160  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
161  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
162  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
163  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
164  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
165  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
166  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
167  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
168  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
169  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
170  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
171  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
172  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
173  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
174  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
175  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
176  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
177  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
178  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
179  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
180  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
181  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
182  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
183  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
184  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
185  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
186  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
187  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
188  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
189main_body:
190  %s = extractelement <2 x i16> %coords_lo, i32 0
191  %t = extractelement <2 x i16> %coords_lo, i32 1
192  %r = extractelement <2 x i16> %coords_hi, i32 0
193  %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
194  ret <4 x float> %v
195}
196
197define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
198  ; GFX9-LABEL: name: load_cube
199  ; GFX9: bb.1.main_body:
200  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
201  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
202  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
203  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
204  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
205  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
206  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
207  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
208  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
209  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
210  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
211  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
212  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
213  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
214  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
215  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
216  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
217  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
218  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
219  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
220  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
221  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
222  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
223  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
224  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
225  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
226  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
227  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
228  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
229  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
230  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
231  ; GFX10NSA-LABEL: name: load_cube
232  ; GFX10NSA: bb.1.main_body:
233  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
234  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
235  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
236  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
237  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
238  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
239  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
240  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
241  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
242  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
243  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
244  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
245  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
246  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
247  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
248  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
249  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
250  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
251  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
252  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
253  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
254  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
255  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
256  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
257  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
258  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
259  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
260  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
261  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
262  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
263  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
264main_body:
265  %s = extractelement <2 x i16> %coords_lo, i32 0
266  %t = extractelement <2 x i16> %coords_lo, i32 1
267  %slice = extractelement <2 x i16> %coords_hi, i32 0
268  %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
269  ret <4 x float> %v
270}
271
272define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
273  ; GFX9-LABEL: name: load_1darray
274  ; GFX9: bb.1.main_body:
275  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
276  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
277  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
278  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
279  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
280  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
281  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
282  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
283  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
284  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
285  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
286  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
287  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
288  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
289  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
290  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
291  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
292  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
293  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
294  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
295  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
296  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
297  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
298  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
299  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
300  ; GFX10NSA-LABEL: name: load_1darray
301  ; GFX10NSA: bb.1.main_body:
302  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
303  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
304  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
305  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
306  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
307  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
308  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
309  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
310  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
311  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
312  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
313  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
314  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
315  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
316  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
317  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
318  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
319  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
320  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
321  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
322  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
323  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
324  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
325  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
326  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
327main_body:
328  %s = extractelement <2 x i16> %coords, i32 0
329  %slice = extractelement <2 x i16> %coords, i32 1
330  %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32 15, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
331  ret <4 x float> %v
332}
333
334define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
335  ; GFX9-LABEL: name: load_2darray
336  ; GFX9: bb.1.main_body:
337  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
338  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
339  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
340  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
341  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
342  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
343  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
344  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
345  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
346  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
347  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
348  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
349  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
350  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
351  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
352  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
353  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
354  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
355  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
356  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
357  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
358  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
359  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
360  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
361  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
362  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
363  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
364  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
365  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
366  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
367  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
368  ; GFX10NSA-LABEL: name: load_2darray
369  ; GFX10NSA: bb.1.main_body:
370  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
371  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
372  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
373  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
374  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
375  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
376  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
377  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
378  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
379  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
380  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
381  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
382  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
383  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
384  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
385  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
386  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
387  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
388  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
389  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
390  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
391  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
392  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
393  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
394  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
395  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
396  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
397  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
398  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
399  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
400  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
401main_body:
402  %s = extractelement <2 x i16> %coords_lo, i32 0
403  %t = extractelement <2 x i16> %coords_lo, i32 1
404  %slice = extractelement <2 x i16> %coords_hi, i32 0
405  %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
406  ret <4 x float> %v
407}
408
409define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
410  ; GFX9-LABEL: name: load_2dmsaa
411  ; GFX9: bb.1.main_body:
412  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
413  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
414  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
415  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
416  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
417  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
418  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
419  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
420  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
421  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
422  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
423  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
424  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
425  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
426  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
427  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
428  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
429  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
430  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
431  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
432  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
433  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
434  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
435  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
436  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
437  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
438  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
439  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
440  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
441  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
442  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
443  ; GFX10NSA-LABEL: name: load_2dmsaa
444  ; GFX10NSA: bb.1.main_body:
445  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
446  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
447  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
448  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
449  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
450  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
451  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
452  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
453  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
454  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
455  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
456  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
457  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
458  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
459  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
460  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
461  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
462  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
463  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
464  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
465  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
466  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
467  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
468  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
469  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
470  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
471  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
472  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
473  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
474  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
475  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
476main_body:
477  %s = extractelement <2 x i16> %coords_lo, i32 0
478  %t = extractelement <2 x i16> %coords_lo, i32 1
479  %fragid = extractelement <2 x i16> %coords_hi, i32 0
480  %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
481  ret <4 x float> %v
482}
483
484define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
485  ; GFX9-LABEL: name: load_2darraymsaa
486  ; GFX9: bb.1.main_body:
487  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
488  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
489  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
490  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
491  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
492  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
493  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
494  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
495  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
496  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
497  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
498  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
499  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
500  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
501  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
502  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
503  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
504  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
505  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
506  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
507  ; GFX9:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
508  ; GFX9:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
509  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
510  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32)
511  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
512  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
513  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
514  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
515  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
516  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
517  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
518  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
519  ; GFX10NSA-LABEL: name: load_2darraymsaa
520  ; GFX10NSA: bb.1.main_body:
521  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
522  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
523  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
524  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
525  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
526  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
527  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
528  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
529  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
530  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
531  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
532  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
533  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
534  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
535  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
536  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
537  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
538  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
539  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
540  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
541  ; GFX10NSA:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
542  ; GFX10NSA:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
543  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
544  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32)
545  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
546  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
547  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
548  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
549  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
550  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
551  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
552  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
553main_body:
554  %s = extractelement <2 x i16> %coords_lo, i32 0
555  %t = extractelement <2 x i16> %coords_lo, i32 1
556  %slice = extractelement <2 x i16> %coords_hi, i32 0
557  %fragid = extractelement <2 x i16> %coords_hi, i32 1
558  %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
559  ret <4 x float> %v
560}
561
562define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
563  ; GFX9-LABEL: name: load_mip_1d
564  ; GFX9: bb.1.main_body:
565  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
566  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
567  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
568  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
569  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
570  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
571  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
572  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
573  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
574  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
575  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
576  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
577  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
578  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
579  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
580  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
581  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
582  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
583  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
584  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
585  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
586  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
587  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
588  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
589  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
590  ; GFX10NSA-LABEL: name: load_mip_1d
591  ; GFX10NSA: bb.1.main_body:
592  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
593  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
594  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
595  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
596  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
597  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
598  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
599  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
600  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
601  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
602  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
603  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
604  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
605  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
606  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
607  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
608  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
609  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
610  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
611  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
612  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
613  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
614  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
615  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
616  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
617main_body:
618  %s = extractelement <2 x i16> %coords, i32 0
619  %mip = extractelement <2 x i16> %coords, i32 1
620  %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 15, i16 %s, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
621  ret <4 x float> %v
622}
623
624define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
625  ; GFX9-LABEL: name: load_mip_2d
626  ; GFX9: bb.1.main_body:
627  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
628  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
629  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
630  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
631  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
632  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
633  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
634  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
635  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
636  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
637  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
638  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
639  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
640  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
641  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
642  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
643  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
644  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
645  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
646  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
647  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
648  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
649  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
650  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
651  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
652  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
653  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
654  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
655  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
656  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
657  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
658  ; GFX10NSA-LABEL: name: load_mip_2d
659  ; GFX10NSA: bb.1.main_body:
660  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
661  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
662  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
663  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
664  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
665  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
666  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
667  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
668  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
669  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
670  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
671  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
672  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
673  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
674  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
675  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
676  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
677  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
678  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
679  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
680  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
681  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
682  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
683  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
684  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
685  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
686  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
687  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
688  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
689  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
690  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
691main_body:
692  %s = extractelement <2 x i16> %coords_lo, i32 0
693  %t = extractelement <2 x i16> %coords_lo, i32 1
694  %mip = extractelement <2 x i16> %coords_hi, i32 0
695  %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
696  ret <4 x float> %v
697}
698
699define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
700  ; GFX9-LABEL: name: load_mip_3d
701  ; GFX9: bb.1.main_body:
702  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
703  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
704  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
705  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
706  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
707  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
708  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
709  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
710  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
711  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
712  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
713  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
714  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
715  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
716  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
717  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
718  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
719  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
720  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
721  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
722  ; GFX9:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
723  ; GFX9:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
724  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
725  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32)
726  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
727  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
728  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
729  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
730  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
731  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
732  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
733  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
734  ; GFX10NSA-LABEL: name: load_mip_3d
735  ; GFX10NSA: bb.1.main_body:
736  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
737  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
738  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
739  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
740  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
741  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
742  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
743  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
744  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
745  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
746  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
747  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
748  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
749  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
750  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
751  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
752  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
753  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
754  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
755  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
756  ; GFX10NSA:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
757  ; GFX10NSA:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
758  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
759  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32)
760  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
761  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
762  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
763  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
764  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
765  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
766  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
767  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
768main_body:
769  %s = extractelement <2 x i16> %coords_lo, i32 0
770  %t = extractelement <2 x i16> %coords_lo, i32 1
771  %r = extractelement <2 x i16> %coords_hi, i32 0
772  %mip = extractelement <2 x i16> %coords_hi, i32 1
773  %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %r, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
774  ret <4 x float> %v
775}
776
777define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
778  ; GFX9-LABEL: name: load_mip_cube
779  ; GFX9: bb.1.main_body:
780  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
781  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
782  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
783  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
784  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
785  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
786  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
787  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
788  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
789  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
790  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
791  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
792  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
793  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
794  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
795  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
796  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
797  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
798  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
799  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
800  ; GFX9:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
801  ; GFX9:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
802  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
803  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32)
804  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
805  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
806  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
807  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
808  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
809  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
810  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
811  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
812  ; GFX10NSA-LABEL: name: load_mip_cube
813  ; GFX10NSA: bb.1.main_body:
814  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
815  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
816  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
817  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
818  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
819  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
820  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
821  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
822  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
823  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
824  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
825  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
826  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
827  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
828  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
829  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
830  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
831  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
832  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
833  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
834  ; GFX10NSA:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
835  ; GFX10NSA:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
836  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
837  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32)
838  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
839  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
840  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
841  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
842  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
843  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
844  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
845  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
846main_body:
847  %s = extractelement <2 x i16> %coords_lo, i32 0
848  %t = extractelement <2 x i16> %coords_lo, i32 1
849  %slice = extractelement <2 x i16> %coords_hi, i32 0
850  %mip = extractelement <2 x i16> %coords_hi, i32 1
851  %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
852  ret <4 x float> %v
853}
854
855define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
856  ; GFX9-LABEL: name: load_mip_1darray
857  ; GFX9: bb.1.main_body:
858  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
859  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
860  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
861  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
862  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
863  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
864  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
865  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
866  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
867  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
868  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
869  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
870  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
871  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
872  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
873  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
874  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
875  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
876  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
877  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
878  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
879  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
880  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
881  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
882  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
883  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
884  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
885  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
886  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
887  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
888  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
889  ; GFX10NSA-LABEL: name: load_mip_1darray
890  ; GFX10NSA: bb.1.main_body:
891  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
892  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
893  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
894  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
895  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
896  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
897  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
898  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
899  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
900  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
901  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
902  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
903  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
904  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
905  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
906  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
907  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
908  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
909  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
910  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
911  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
912  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
913  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
914  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
915  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
916  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
917  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
918  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
919  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
920  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
921  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
922main_body:
923  %s = extractelement <2 x i16> %coords_lo, i32 0
924  %slice = extractelement <2 x i16> %coords_lo, i32 1
925  %mip = extractelement <2 x i16> %coords_hi, i32 0
926  %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 15, i16 %s, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
927  ret <4 x float> %v
928}
929
930define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
931  ; GFX9-LABEL: name: load_mip_2darray
932  ; GFX9: bb.1.main_body:
933  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
934  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
935  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
936  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
937  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
938  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
939  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
940  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
941  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
942  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
943  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
944  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
945  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
946  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
947  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
948  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
949  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
950  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
951  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
952  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
953  ; GFX9:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
954  ; GFX9:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
955  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
956  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32)
957  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
958  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
959  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
960  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
961  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
962  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
963  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
964  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
965  ; GFX10NSA-LABEL: name: load_mip_2darray
966  ; GFX10NSA: bb.1.main_body:
967  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
968  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
969  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
970  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
971  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
972  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
973  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
974  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
975  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
976  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
977  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
978  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
979  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
980  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
981  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
982  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
983  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
984  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
985  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
986  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
987  ; GFX10NSA:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
988  ; GFX10NSA:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
989  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
990  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32)
991  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
992  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
993  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
994  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
995  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
996  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
997  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
998  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
999main_body:
1000  %s = extractelement <2 x i16> %coords_lo, i32 0
1001  %t = extractelement <2 x i16> %coords_lo, i32 1
1002  %slice = extractelement <2 x i16> %coords_hi, i32 0
1003  %mip = extractelement <2 x i16> %coords_hi, i32 1
1004  %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
1005  ret <4 x float> %v
1006}
1007
1008define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
1009  ; GFX9-LABEL: name: store_1d
1010  ; GFX9: bb.1.main_body:
1011  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1012  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1013  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1014  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1015  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1016  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1017  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1018  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1019  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1020  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1021  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1022  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1023  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1024  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1025  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1026  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1027  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1028  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1029  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1030  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1031  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1032  ; GFX9:   S_ENDPGM 0
1033  ; GFX10NSA-LABEL: name: store_1d
1034  ; GFX10NSA: bb.1.main_body:
1035  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1036  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1037  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1038  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1039  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1040  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1041  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1042  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1043  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1044  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1045  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1046  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1047  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1048  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1049  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1050  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1051  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1052  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1053  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1054  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1055  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1056  ; GFX10NSA:   S_ENDPGM 0
1057main_body:
1058  %s = extractelement <2 x i16> %coords, i32 0
1059  call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1060  ret void
1061}
1062
1063define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
1064  ; GFX9-LABEL: name: store_2d
1065  ; GFX9: bb.1.main_body:
1066  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1067  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1068  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1069  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1070  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1071  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1072  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1073  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1074  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1075  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1076  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1077  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1078  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1079  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1080  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1081  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1082  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1083  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1084  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1085  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1086  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1087  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1088  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1089  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1090  ; GFX9:   S_ENDPGM 0
1091  ; GFX10NSA-LABEL: name: store_2d
1092  ; GFX10NSA: bb.1.main_body:
1093  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1094  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1095  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1096  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1097  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1098  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1099  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1100  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1101  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1102  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1103  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1104  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1105  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1106  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1107  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1108  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1109  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1110  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1111  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1112  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1113  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1114  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1115  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1116  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1117  ; GFX10NSA:   S_ENDPGM 0
1118main_body:
1119  %s = extractelement <2 x i16> %coords, i32 0
1120  %t = extractelement <2 x i16> %coords, i32 1
1121  call void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
1122  ret void
1123}
1124
1125define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1126  ; GFX9-LABEL: name: store_3d
1127  ; GFX9: bb.1.main_body:
1128  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1129  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1130  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1131  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1132  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1133  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1134  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1135  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1136  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1137  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1138  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1139  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1140  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1141  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1142  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1143  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1144  ; GFX9:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1145  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1146  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1147  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1148  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1149  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1150  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1151  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1152  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1153  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1154  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1155  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
1156  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1157  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1158  ; GFX9:   S_ENDPGM 0
1159  ; GFX10NSA-LABEL: name: store_3d
1160  ; GFX10NSA: bb.1.main_body:
1161  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1162  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1163  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1164  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1165  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1166  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1167  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1168  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1169  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1170  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1171  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1172  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1173  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1174  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1175  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1176  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1177  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1178  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1179  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1180  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1181  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1182  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1183  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1184  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1185  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1186  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1187  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1188  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
1189  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1190  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1191  ; GFX10NSA:   S_ENDPGM 0
1192main_body:
1193  %s = extractelement <2 x i16> %coords_lo, i32 0
1194  %t = extractelement <2 x i16> %coords_lo, i32 1
1195  %r = extractelement <2 x i16> %coords_hi, i32 0
1196  call void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
1197  ret void
1198}
1199
1200define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1201  ; GFX9-LABEL: name: store_cube
1202  ; GFX9: bb.1.main_body:
1203  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1204  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1205  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1206  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1207  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1208  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1209  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1210  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1211  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1212  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1213  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1214  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1215  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1216  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1217  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1218  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1219  ; GFX9:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1220  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1221  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1222  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1223  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1224  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1225  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1226  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1227  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1228  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1229  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1230  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
1231  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1232  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1233  ; GFX9:   S_ENDPGM 0
1234  ; GFX10NSA-LABEL: name: store_cube
1235  ; GFX10NSA: bb.1.main_body:
1236  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1237  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1238  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1239  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1240  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1241  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1242  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1243  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1244  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1245  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1246  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1247  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1248  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1249  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1250  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1251  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1252  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1253  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1254  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1255  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1256  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1257  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1258  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1259  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1260  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1261  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1262  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1263  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
1264  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1265  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1266  ; GFX10NSA:   S_ENDPGM 0
1267main_body:
1268  %s = extractelement <2 x i16> %coords_lo, i32 0
1269  %t = extractelement <2 x i16> %coords_lo, i32 1
1270  %slice = extractelement <2 x i16> %coords_hi, i32 0
1271  call void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1272  ret void
1273}
1274
1275define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
1276  ; GFX9-LABEL: name: store_1darray
1277  ; GFX9: bb.1.main_body:
1278  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1279  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1280  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1281  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1282  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1283  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1284  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1285  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1286  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1287  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1288  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1289  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1290  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1291  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1292  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1293  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1294  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1295  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1296  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1297  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1298  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1299  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1300  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1301  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1302  ; GFX9:   S_ENDPGM 0
1303  ; GFX10NSA-LABEL: name: store_1darray
1304  ; GFX10NSA: bb.1.main_body:
1305  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1306  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1307  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1308  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1309  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1310  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1311  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1312  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1313  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1314  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1315  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1316  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1317  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1318  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1319  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1320  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1321  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1322  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1323  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1324  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1325  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1326  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1327  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1328  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1329  ; GFX10NSA:   S_ENDPGM 0
1330main_body:
1331  %s = extractelement <2 x i16> %coords, i32 0
1332  %slice = extractelement <2 x i16> %coords, i32 1
1333  call void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1334  ret void
1335}
1336
1337define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1338  ; GFX9-LABEL: name: store_2darray
1339  ; GFX9: bb.1.main_body:
1340  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1341  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1342  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1343  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1344  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1345  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1346  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1347  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1348  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1349  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1350  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1351  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1352  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1353  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1354  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1355  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1356  ; GFX9:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1357  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1358  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1359  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1360  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1361  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1362  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1363  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1364  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1365  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1366  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1367  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
1368  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1369  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1370  ; GFX9:   S_ENDPGM 0
1371  ; GFX10NSA-LABEL: name: store_2darray
1372  ; GFX10NSA: bb.1.main_body:
1373  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1374  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1375  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1376  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1377  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1378  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1379  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1380  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1381  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1382  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1383  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1384  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1385  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1386  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1387  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1388  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1389  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1390  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1391  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1392  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1393  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1394  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1395  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1396  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1397  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1398  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1399  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1400  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
1401  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1402  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1403  ; GFX10NSA:   S_ENDPGM 0
1404main_body:
1405  %s = extractelement <2 x i16> %coords_lo, i32 0
1406  %t = extractelement <2 x i16> %coords_lo, i32 1
1407  %slice = extractelement <2 x i16> %coords_hi, i32 0
1408  call void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1409  ret void
1410}
1411
1412define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1413  ; GFX9-LABEL: name: store_2dmsaa
1414  ; GFX9: bb.1.main_body:
1415  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1416  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1417  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1418  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1419  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1420  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1421  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1422  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1423  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1424  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1425  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1426  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1427  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1428  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1429  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1430  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1431  ; GFX9:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1432  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1433  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1434  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1435  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1436  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1437  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1438  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1439  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1440  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1441  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1442  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
1443  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1444  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1445  ; GFX9:   S_ENDPGM 0
1446  ; GFX10NSA-LABEL: name: store_2dmsaa
1447  ; GFX10NSA: bb.1.main_body:
1448  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1449  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1450  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1451  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1452  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1453  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1454  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1455  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1456  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1457  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1458  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1459  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1460  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1461  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1462  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1463  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1464  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1465  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1466  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1467  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1468  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1469  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1470  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1471  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1472  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1473  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1474  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1475  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
1476  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1477  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1478  ; GFX10NSA:   S_ENDPGM 0
1479main_body:
1480  %s = extractelement <2 x i16> %coords_lo, i32 0
1481  %t = extractelement <2 x i16> %coords_lo, i32 1
1482  %fragid = extractelement <2 x i16> %coords_hi, i32 0
1483  call void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1484  ret void
1485}
1486
1487define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1488  ; GFX9-LABEL: name: store_2darraymsaa
1489  ; GFX9: bb.1.main_body:
1490  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1491  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1492  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1493  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1494  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1495  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1496  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1497  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1498  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1499  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1500  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1501  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1502  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1503  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1504  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1505  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1506  ; GFX9:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1507  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1508  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1509  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1510  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1511  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1512  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1513  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1514  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1515  ; GFX9:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1516  ; GFX9:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
1517  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1518  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32)
1519  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1520  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1521  ; GFX9:   S_ENDPGM 0
1522  ; GFX10NSA-LABEL: name: store_2darraymsaa
1523  ; GFX10NSA: bb.1.main_body:
1524  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1525  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1526  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1527  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1528  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1529  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1530  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1531  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1532  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1533  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1534  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1535  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1536  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1537  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1538  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1539  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1540  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1541  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1542  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1543  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1544  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1545  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1546  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1547  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1548  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1549  ; GFX10NSA:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1550  ; GFX10NSA:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
1551  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1552  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32)
1553  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1554  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1555  ; GFX10NSA:   S_ENDPGM 0
1556main_body:
1557  %s = extractelement <2 x i16> %coords_lo, i32 0
1558  %t = extractelement <2 x i16> %coords_lo, i32 1
1559  %slice = extractelement <2 x i16> %coords_hi, i32 0
1560  %fragid = extractelement <2 x i16> %coords_hi, i32 1
1561  call void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1562  ret void
1563}
1564
1565define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
1566  ; GFX9-LABEL: name: store_mip_1d
1567  ; GFX9: bb.1.main_body:
1568  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1569  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1570  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1571  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1572  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1573  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1574  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1575  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1576  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1577  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1578  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1579  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1580  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1581  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1582  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1583  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1584  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1585  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1586  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1587  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1588  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1589  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1590  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1591  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1592  ; GFX9:   S_ENDPGM 0
1593  ; GFX10NSA-LABEL: name: store_mip_1d
1594  ; GFX10NSA: bb.1.main_body:
1595  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1596  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1597  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1598  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1599  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1600  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1601  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1602  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1603  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1604  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1605  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1606  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1607  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1608  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1609  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1610  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1611  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1612  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1613  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1614  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1615  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1616  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1617  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1618  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1619  ; GFX10NSA:   S_ENDPGM 0
1620main_body:
1621  %s = extractelement <2 x i16> %coords, i32 0
1622  %mip = extractelement <2 x i16> %coords, i32 1
1623  call void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
1624  ret void
1625}
1626
1627define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1628  ; GFX9-LABEL: name: store_mip_2d
1629  ; GFX9: bb.1.main_body:
1630  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1631  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1632  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1633  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1634  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1635  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1636  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1637  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1638  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1639  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1640  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1641  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1642  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1643  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1644  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1645  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1646  ; GFX9:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1647  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1648  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1649  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1650  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1651  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1652  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1653  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1654  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1655  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1656  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1657  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
1658  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1659  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1660  ; GFX9:   S_ENDPGM 0
1661  ; GFX10NSA-LABEL: name: store_mip_2d
1662  ; GFX10NSA: bb.1.main_body:
1663  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1664  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1665  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1666  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1667  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1668  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1669  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1670  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1671  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1672  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1673  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1674  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1675  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1676  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1677  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1678  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1679  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1680  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1681  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1682  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1683  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1684  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1685  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1686  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1687  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1688  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1689  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1690  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
1691  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1692  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1693  ; GFX10NSA:   S_ENDPGM 0
1694main_body:
1695  %s = extractelement <2 x i16> %coords_lo, i32 0
1696  %t = extractelement <2 x i16> %coords_lo, i32 1
1697  %mip = extractelement <2 x i16> %coords_hi, i32 0
1698  call void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
1699  ret void
1700}
1701
1702define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1703  ; GFX9-LABEL: name: store_mip_3d
1704  ; GFX9: bb.1.main_body:
1705  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1706  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1707  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1708  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1709  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1710  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1711  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1712  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1713  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1714  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1715  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1716  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1717  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1718  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1719  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1720  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1721  ; GFX9:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1722  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1723  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1724  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1725  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1726  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1727  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1728  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1729  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1730  ; GFX9:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1731  ; GFX9:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
1732  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1733  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32)
1734  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1735  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1736  ; GFX9:   S_ENDPGM 0
1737  ; GFX10NSA-LABEL: name: store_mip_3d
1738  ; GFX10NSA: bb.1.main_body:
1739  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1740  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1741  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1742  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1743  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1744  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1745  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1746  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1747  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1748  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1749  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1750  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1751  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1752  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1753  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1754  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1755  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1756  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1757  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1758  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1759  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1760  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1761  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1762  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1763  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1764  ; GFX10NSA:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1765  ; GFX10NSA:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
1766  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1767  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32)
1768  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1769  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1770  ; GFX10NSA:   S_ENDPGM 0
1771main_body:
1772  %s = extractelement <2 x i16> %coords_lo, i32 0
1773  %t = extractelement <2 x i16> %coords_lo, i32 1
1774  %r = extractelement <2 x i16> %coords_hi, i32 0
1775  %mip = extractelement <2 x i16> %coords_hi, i32 1
1776  call void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %r, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
1777  ret void
1778}
1779
1780define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1781  ; GFX9-LABEL: name: store_mip_cube
1782  ; GFX9: bb.1.main_body:
1783  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1784  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1785  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1786  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1787  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1788  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1789  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1790  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1791  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1792  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1793  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1794  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1795  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1796  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1797  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1798  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1799  ; GFX9:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1800  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1801  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1802  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1803  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1804  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1805  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1806  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1807  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1808  ; GFX9:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1809  ; GFX9:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
1810  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1811  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32)
1812  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1813  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1814  ; GFX9:   S_ENDPGM 0
1815  ; GFX10NSA-LABEL: name: store_mip_cube
1816  ; GFX10NSA: bb.1.main_body:
1817  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1818  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1819  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1820  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1821  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1822  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1823  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1824  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1825  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1826  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1827  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1828  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1829  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1830  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1831  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1832  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1833  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1834  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1835  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1836  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1837  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1838  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1839  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1840  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1841  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1842  ; GFX10NSA:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1843  ; GFX10NSA:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
1844  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1845  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32)
1846  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1847  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1848  ; GFX10NSA:   S_ENDPGM 0
1849main_body:
1850  %s = extractelement <2 x i16> %coords_lo, i32 0
1851  %t = extractelement <2 x i16> %coords_lo, i32 1
1852  %slice = extractelement <2 x i16> %coords_hi, i32 0
1853  %mip = extractelement <2 x i16> %coords_hi, i32 1
1854  call void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
1855  ret void
1856}
1857
1858define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1859  ; GFX9-LABEL: name: store_mip_1darray
1860  ; GFX9: bb.1.main_body:
1861  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1862  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1863  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1864  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1865  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1866  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1867  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1868  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1869  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1870  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1871  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1872  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1873  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1874  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1875  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1876  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1877  ; GFX9:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1878  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1879  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1880  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1881  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1882  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1883  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1884  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1885  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1886  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1887  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1888  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
1889  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1890  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1891  ; GFX9:   S_ENDPGM 0
1892  ; GFX10NSA-LABEL: name: store_mip_1darray
1893  ; GFX10NSA: bb.1.main_body:
1894  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1895  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1896  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1897  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1898  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1899  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1900  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1901  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1902  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1903  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1904  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1905  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1906  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1907  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1908  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1909  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1910  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1911  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1912  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1913  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1914  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1915  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1916  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1917  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1918  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1919  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1920  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
1921  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
1922  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1923  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1924  ; GFX10NSA:   S_ENDPGM 0
1925main_body:
1926  %s = extractelement <2 x i16> %coords_lo, i32 0
1927  %slice = extractelement <2 x i16> %coords_lo, i32 1
1928  %mip = extractelement <2 x i16> %coords_hi, i32 0
1929  call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
1930  ret void
1931}
1932
1933define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1934  ; GFX9-LABEL: name: store_mip_2darray
1935  ; GFX9: bb.1.main_body:
1936  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1937  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1938  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1939  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1940  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1941  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1942  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1943  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1944  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1945  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1946  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1947  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1948  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1949  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1950  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1951  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1952  ; GFX9:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1953  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1954  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1955  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1956  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1957  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1958  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1959  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1960  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1961  ; GFX9:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1962  ; GFX9:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
1963  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1964  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32)
1965  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
1966  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
1967  ; GFX9:   S_ENDPGM 0
1968  ; GFX10NSA-LABEL: name: store_mip_2darray
1969  ; GFX10NSA: bb.1.main_body:
1970  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1971  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1972  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1973  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1974  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1975  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1976  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1977  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1978  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1979  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1980  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1981  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1982  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1983  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1984  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1985  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1986  ; GFX10NSA:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1987  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1988  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1989  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1990  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1991  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1992  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1993  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1994  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1995  ; GFX10NSA:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1996  ; GFX10NSA:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
1997  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
1998  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32)
1999  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
2000  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
2001  ; GFX10NSA:   S_ENDPGM 0
2002main_body:
2003  %s = extractelement <2 x i16> %coords_lo, i32 0
2004  %t = extractelement <2 x i16> %coords_lo, i32 1
2005  %slice = extractelement <2 x i16> %coords_hi, i32 0
2006  %mip = extractelement <2 x i16> %coords_hi, i32 1
2007  call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2008  ret void
2009}
2010
2011define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2012  ; GFX9-LABEL: name: getresinfo_1d
2013  ; GFX9: bb.1.main_body:
2014  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2015  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2016  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2017  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2018  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2019  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2020  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2021  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2022  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2023  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2024  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2025  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2026  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2027  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2028  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2029  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2030  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2031  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2032  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2033  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2034  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2035  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2036  ; GFX10NSA-LABEL: name: getresinfo_1d
2037  ; GFX10NSA: bb.1.main_body:
2038  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2039  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2040  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2041  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2042  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2043  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2044  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2045  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2046  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2047  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2048  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2049  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2050  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2051  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2052  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2053  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2054  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2055  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2056  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2057  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2058  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2059  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2060main_body:
2061  %mip = extractelement <2 x i16> %coords, i32 0
2062  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2063  ret <4 x float> %v
2064}
2065
2066define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2067  ; GFX9-LABEL: name: getresinfo_2d
2068  ; GFX9: bb.1.main_body:
2069  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2070  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2071  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2072  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2073  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2074  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2075  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2076  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2077  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2078  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2079  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2080  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2081  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2082  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2083  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2084  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2085  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2086  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2087  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2088  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2089  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2090  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2091  ; GFX10NSA-LABEL: name: getresinfo_2d
2092  ; GFX10NSA: bb.1.main_body:
2093  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2094  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2095  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2096  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2097  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2098  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2099  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2100  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2101  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2102  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2103  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2104  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2105  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2106  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2107  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2108  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2109  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2110  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2111  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2112  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2113  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2114  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2115main_body:
2116  %mip = extractelement <2 x i16> %coords, i32 0
2117  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2118  ret <4 x float> %v
2119}
2120
2121define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2122  ; GFX9-LABEL: name: getresinfo_3d
2123  ; GFX9: bb.1.main_body:
2124  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2125  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2126  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2127  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2128  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2129  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2130  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2131  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2132  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2133  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2134  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2135  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2136  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2137  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2138  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2139  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2140  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2141  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2142  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2143  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2144  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2145  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2146  ; GFX10NSA-LABEL: name: getresinfo_3d
2147  ; GFX10NSA: bb.1.main_body:
2148  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2149  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2150  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2151  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2152  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2153  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2154  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2155  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2156  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2157  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2158  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2159  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2160  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2161  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2162  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2163  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2164  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2165  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2166  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2167  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2168  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2169  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2170main_body:
2171  %mip = extractelement <2 x i16> %coords, i32 0
2172  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2173  ret <4 x float> %v
2174}
2175
2176define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2177  ; GFX9-LABEL: name: getresinfo_cube
2178  ; GFX9: bb.1.main_body:
2179  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2180  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2181  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2182  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2183  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2184  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2185  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2186  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2187  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2188  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2189  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2190  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2191  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2192  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2193  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2194  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2195  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2196  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2197  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2198  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2199  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2200  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2201  ; GFX10NSA-LABEL: name: getresinfo_cube
2202  ; GFX10NSA: bb.1.main_body:
2203  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2204  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2205  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2206  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2207  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2208  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2209  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2210  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2211  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2212  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2213  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2214  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2215  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2216  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2217  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2218  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2219  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2220  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2221  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2222  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2223  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2224  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2225main_body:
2226  %mip = extractelement <2 x i16> %coords, i32 0
2227  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2228  ret <4 x float> %v
2229}
2230
2231define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2232  ; GFX9-LABEL: name: getresinfo_1darray
2233  ; GFX9: bb.1.main_body:
2234  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2235  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2236  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2237  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2238  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2239  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2240  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2241  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2242  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2243  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2244  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2245  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2246  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2247  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2248  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2249  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2250  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2251  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2252  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2253  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2254  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2255  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2256  ; GFX10NSA-LABEL: name: getresinfo_1darray
2257  ; GFX10NSA: bb.1.main_body:
2258  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2259  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2260  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2261  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2262  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2263  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2264  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2265  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2266  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2267  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2268  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2269  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2270  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2271  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2272  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2273  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2274  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2275  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2276  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2277  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2278  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2279  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2280main_body:
2281  %mip = extractelement <2 x i16> %coords, i32 0
2282  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2283  ret <4 x float> %v
2284}
2285
2286define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2287  ; GFX9-LABEL: name: getresinfo_2darray
2288  ; GFX9: bb.1.main_body:
2289  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2290  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2291  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2292  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2293  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2294  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2295  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2296  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2297  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2298  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2299  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2300  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2301  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2302  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2303  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2304  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2305  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2306  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2307  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2308  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2309  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2310  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2311  ; GFX10NSA-LABEL: name: getresinfo_2darray
2312  ; GFX10NSA: bb.1.main_body:
2313  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2314  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2315  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2316  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2317  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2318  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2319  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2320  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2321  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2322  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2323  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2324  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2325  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2326  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2327  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2328  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2329  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2330  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2331  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2332  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2333  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2334  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2335main_body:
2336  %mip = extractelement <2 x i16> %coords, i32 0
2337  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2338  ret <4 x float> %v
2339}
2340
2341define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2342  ; GFX9-LABEL: name: getresinfo_2dmsaa
2343  ; GFX9: bb.1.main_body:
2344  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2345  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2346  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2347  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2348  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2349  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2350  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2351  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2352  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2353  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2354  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2355  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2356  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2357  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2358  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2359  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2360  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2361  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2362  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2363  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2364  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2365  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2366  ; GFX10NSA-LABEL: name: getresinfo_2dmsaa
2367  ; GFX10NSA: bb.1.main_body:
2368  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2369  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2370  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2371  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2372  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2373  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2374  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2375  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2376  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2377  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2378  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2379  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2380  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2381  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2382  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2383  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2384  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2385  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2386  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2387  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2388  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2389  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2390main_body:
2391  %mip = extractelement <2 x i16> %coords, i32 0
2392  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2393  ret <4 x float> %v
2394}
2395
2396define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2397  ; GFX9-LABEL: name: getresinfo_2darraymsaa
2398  ; GFX9: bb.1.main_body:
2399  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2400  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2401  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2402  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2403  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2404  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2405  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2406  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2407  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2408  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2409  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2410  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2411  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2412  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2413  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2414  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2415  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2416  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2417  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2418  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2419  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2420  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2421  ; GFX10NSA-LABEL: name: getresinfo_2darraymsaa
2422  ; GFX10NSA: bb.1.main_body:
2423  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2424  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2425  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2426  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2427  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2428  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2429  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2430  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2431  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2432  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2433  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2434  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2435  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2436  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2437  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2438  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2439  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2440  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2441  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2442  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2443  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2444  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2445main_body:
2446  %mip = extractelement <2 x i16> %coords, i32 0
2447  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2448  ret <4 x float> %v
2449}
2450
2451define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2452  ; GFX9-LABEL: name: load_1d_V1
2453  ; GFX9: bb.1.main_body:
2454  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2455  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2456  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2457  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2458  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2459  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2460  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2461  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2462  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2463  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2464  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2465  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2466  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2467  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2468  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2469  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (s32) from custom "ImageResource")
2470  ; GFX9:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
2471  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
2472  ; GFX10NSA-LABEL: name: load_1d_V1
2473  ; GFX10NSA: bb.1.main_body:
2474  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2475  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2476  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2477  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2478  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2479  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2480  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2481  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2482  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2483  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2484  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2485  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2486  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2487  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2488  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2489  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (s32) from custom "ImageResource")
2490  ; GFX10NSA:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
2491  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0
2492main_body:
2493  %s = extractelement <2 x i16> %coords, i32 0
2494  %v = call float @llvm.amdgcn.image.load.1d.f32.i16(i32 8, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
2495  ret float %v
2496}
2497
2498define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2499  ; GFX9-LABEL: name: load_1d_V2
2500  ; GFX9: bb.1.main_body:
2501  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2502  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2503  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2504  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2505  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2506  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2507  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2508  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2509  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2510  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2511  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2512  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2513  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2514  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2515  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2516  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<2 x s32>) from custom "ImageResource")
2517  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
2518  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2519  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2520  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
2521  ; GFX10NSA-LABEL: name: load_1d_V2
2522  ; GFX10NSA: bb.1.main_body:
2523  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2524  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2525  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2526  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2527  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2528  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2529  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2530  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2531  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2532  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2533  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2534  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2535  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2536  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2537  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2538  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<2 x s32>) from custom "ImageResource")
2539  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
2540  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2541  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2542  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
2543main_body:
2544  %s = extractelement <2 x i16> %coords, i32 0
2545  %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32 9, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
2546  ret <2 x float> %v
2547}
2548
2549define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, <2 x i16> %coords) {
2550  ; GFX9-LABEL: name: store_1d_V1
2551  ; GFX9: bb.1.main_body:
2552  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
2553  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2554  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2555  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2556  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2557  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2558  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2559  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2560  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2561  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2562  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2563  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
2564  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2565  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
2566  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2567  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2568  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (s32) into custom "ImageResource")
2569  ; GFX9:   S_ENDPGM 0
2570  ; GFX10NSA-LABEL: name: store_1d_V1
2571  ; GFX10NSA: bb.1.main_body:
2572  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
2573  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2574  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2575  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2576  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2577  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2578  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2579  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2580  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2581  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2582  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2583  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
2584  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2585  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
2586  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2587  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2588  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (s32) into custom "ImageResource")
2589  ; GFX10NSA:   S_ENDPGM 0
2590main_body:
2591  %s = extractelement <2 x i16> %coords, i32 0
2592  call void @llvm.amdgcn.image.store.1d.f32.i16(float %vdata, i32 2, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
2593  ret void
2594}
2595
2596define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, <2 x i16> %coords) {
2597  ; GFX9-LABEL: name: store_1d_V2
2598  ; GFX9: bb.1.main_body:
2599  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
2600  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2601  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2602  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2603  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2604  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2605  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2606  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2607  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2608  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2609  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2610  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2611  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
2612  ; GFX9:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
2613  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2614  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>)
2615  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2616  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2617  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<2 x s32>) into custom "ImageResource")
2618  ; GFX9:   S_ENDPGM 0
2619  ; GFX10NSA-LABEL: name: store_1d_V2
2620  ; GFX10NSA: bb.1.main_body:
2621  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
2622  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2623  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2624  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2625  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2626  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2627  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2628  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2629  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2630  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2631  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2632  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2633  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
2634  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
2635  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2636  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>)
2637  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2638  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2639  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<2 x s32>) into custom "ImageResource")
2640  ; GFX10NSA:   S_ENDPGM 0
2641main_body:
2642  %s = extractelement <2 x i16> %coords, i32 0
2643  call void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float> %vdata, i32 12, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
2644  ret void
2645}
2646
2647define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2648  ; GFX9-LABEL: name: load_1d_glc
2649  ; GFX9: bb.1.main_body:
2650  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2651  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2652  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2653  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2654  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2655  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2656  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2657  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2658  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2659  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2660  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2661  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2662  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2663  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2664  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2665  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
2666  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2667  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2668  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2669  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2670  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2671  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2672  ; GFX10NSA-LABEL: name: load_1d_glc
2673  ; GFX10NSA: bb.1.main_body:
2674  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2675  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2676  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2677  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2678  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2679  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2680  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2681  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2682  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2683  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2684  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2685  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2686  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2687  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2688  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2689  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
2690  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2691  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2692  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2693  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2694  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2695  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2696main_body:
2697  %s = extractelement <2 x i16> %coords, i32 0
2698  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 1)
2699  ret <4 x float> %v
2700}
2701
2702define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2703  ; GFX9-LABEL: name: load_1d_slc
2704  ; GFX9: bb.1.main_body:
2705  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2706  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2707  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2708  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2709  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2710  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2711  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2712  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2713  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2714  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2715  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2716  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2717  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2718  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2719  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2720  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
2721  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2722  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2723  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2724  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2725  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2726  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2727  ; GFX10NSA-LABEL: name: load_1d_slc
2728  ; GFX10NSA: bb.1.main_body:
2729  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2730  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2731  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2732  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2733  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2734  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2735  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2736  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2737  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2738  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2739  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2740  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2741  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2742  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2743  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2744  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
2745  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2746  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2747  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2748  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2749  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2750  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2751main_body:
2752  %s = extractelement <2 x i16> %coords, i32 0
2753  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
2754  ret <4 x float> %v
2755}
2756
2757define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2758  ; GFX9-LABEL: name: load_1d_glc_slc
2759  ; GFX9: bb.1.main_body:
2760  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2761  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2762  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2763  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2764  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2765  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2766  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2767  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2768  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2769  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2770  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2771  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2772  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2773  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2774  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2775  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
2776  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2777  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2778  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2779  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2780  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2781  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2782  ; GFX10NSA-LABEL: name: load_1d_glc_slc
2783  ; GFX10NSA: bb.1.main_body:
2784  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2785  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2786  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2787  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2788  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2789  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2790  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2791  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2792  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2793  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2794  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2795  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2796  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2797  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2798  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2799  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
2800  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2801  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2802  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2803  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2804  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2805  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2806main_body:
2807  %s = extractelement <2 x i16> %coords, i32 0
2808  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 3)
2809  ret <4 x float> %v
2810}
2811
2812define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
2813  ; GFX9-LABEL: name: store_1d_glc
2814  ; GFX9: bb.1.main_body:
2815  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
2816  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2817  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2818  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2819  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2820  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2821  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2822  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2823  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2824  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2825  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2826  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2827  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
2828  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
2829  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
2830  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
2831  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2832  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2833  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2834  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2835  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
2836  ; GFX9:   S_ENDPGM 0
2837  ; GFX10NSA-LABEL: name: store_1d_glc
2838  ; GFX10NSA: bb.1.main_body:
2839  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
2840  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2841  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2842  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2843  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2844  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2845  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2846  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2847  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2848  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2849  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2850  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2851  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
2852  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
2853  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
2854  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
2855  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2856  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2857  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2858  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2859  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
2860  ; GFX10NSA:   S_ENDPGM 0
2861main_body:
2862  %s = extractelement <2 x i16> %coords, i32 0
2863  call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 1)
2864  ret void
2865}
2866
2867define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
2868  ; GFX9-LABEL: name: store_1d_slc
2869  ; GFX9: bb.1.main_body:
2870  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
2871  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2872  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2873  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2874  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2875  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2876  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2877  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2878  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2879  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2880  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2881  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2882  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
2883  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
2884  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
2885  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
2886  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2887  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2888  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2889  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2890  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
2891  ; GFX9:   S_ENDPGM 0
2892  ; GFX10NSA-LABEL: name: store_1d_slc
2893  ; GFX10NSA: bb.1.main_body:
2894  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
2895  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2896  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2897  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2898  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2899  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2900  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2901  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2902  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2903  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2904  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2905  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2906  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
2907  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
2908  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
2909  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
2910  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2911  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2912  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2913  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2914  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
2915  ; GFX10NSA:   S_ENDPGM 0
2916main_body:
2917  %s = extractelement <2 x i16> %coords, i32 0
2918  call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
2919  ret void
2920}
2921
2922define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
2923  ; GFX9-LABEL: name: store_1d_glc_slc
2924  ; GFX9: bb.1.main_body:
2925  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
2926  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2927  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2928  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2929  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2930  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2931  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2932  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2933  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2934  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2935  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2936  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2937  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
2938  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
2939  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
2940  ; GFX9:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
2941  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2942  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2943  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2944  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2945  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
2946  ; GFX9:   S_ENDPGM 0
2947  ; GFX10NSA-LABEL: name: store_1d_glc_slc
2948  ; GFX10NSA: bb.1.main_body:
2949  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
2950  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2951  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2952  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2953  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2954  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2955  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2956  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2957  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2958  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2959  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2960  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2961  ; GFX10NSA:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
2962  ; GFX10NSA:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
2963  ; GFX10NSA:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
2964  ; GFX10NSA:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
2965  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2966  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2967  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2968  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2969  ; GFX10NSA:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
2970  ; GFX10NSA:   S_ENDPGM 0
2971main_body:
2972  %s = extractelement <2 x i16> %coords, i32 0
2973  call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 3)
2974  ret void
2975}
2976
2977define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
2978  ; GFX9-LABEL: name: getresinfo_dmask0
2979  ; GFX9: bb.1.main_body:
2980  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2981  ; GFX9:   [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
2982  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
2983  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
2984  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
2985  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
2986  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
2987  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2988  ; GFX10NSA-LABEL: name: getresinfo_dmask0
2989  ; GFX10NSA: bb.1.main_body:
2990  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2991  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
2992  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
2993  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
2994  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
2995  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
2996  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
2997  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2998main_body:
2999  %mip = extractelement <2 x i16> %coords, i32 0
3000  %r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 0, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
3001  ret <4 x float> %r
3002}
3003
3004define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
3005  ; GFX9-LABEL: name: load_1d_tfe
3006  ; GFX9: bb.1.main_body:
3007  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
3008  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3009  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3010  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3011  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3012  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3013  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3014  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3015  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3016  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3017  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3018  ; GFX9:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3019  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3020  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3021  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3022  ; GFX9:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3023  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
3024  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3025  ; GFX9:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
3026  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
3027  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
3028  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
3029  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
3030  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3031  ; GFX10NSA-LABEL: name: load_1d_tfe
3032  ; GFX10NSA: bb.1.main_body:
3033  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
3034  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3035  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3036  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3037  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3038  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3039  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3040  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3041  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3042  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3043  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3044  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3045  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3046  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3047  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3048  ; GFX10NSA:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3049  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
3050  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3051  ; GFX10NSA:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
3052  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
3053  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
3054  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
3055  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
3056  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3057main_body:
3058  %s = extractelement <2 x i16> %coords, i32 0
3059  %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 1, i32 0)
3060  %data = extractvalue { <4 x float>, i32 } %v, 0
3061  %tfe = extractvalue { <4 x float>, i32 } %v, 1
3062  store i32 %tfe, i32 addrspace(1)* undef
3063  ret <4 x float> %data
3064}
3065
3066define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
3067  ; GFX9-LABEL: name: load_2d_tfe
3068  ; GFX9: bb.1.main_body:
3069  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
3070  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3071  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3072  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3073  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3074  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3075  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3076  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3077  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3078  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3079  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3080  ; GFX9:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3081  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3082  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3083  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3084  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3085  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
3086  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
3087  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
3088  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
3089  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3090  ; GFX9:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
3091  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
3092  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
3093  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
3094  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
3095  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3096  ; GFX10NSA-LABEL: name: load_2d_tfe
3097  ; GFX10NSA: bb.1.main_body:
3098  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
3099  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3100  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3101  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3102  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3103  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3104  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3105  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3106  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3107  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3108  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3109  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3110  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3111  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3112  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3113  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3114  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
3115  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
3116  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
3117  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
3118  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3119  ; GFX10NSA:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
3120  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
3121  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
3122  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
3123  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
3124  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3125main_body:
3126  %s = extractelement <2 x i16> %coords, i32 0
3127  %t = extractelement <2 x i16> %coords, i32 1
3128  %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 1, i32 0)
3129  %data = extractvalue { <4 x float>, i32 } %v, 0
3130  %tfe = extractvalue { <4 x float>, i32 } %v, 1
3131  store i32 %tfe, i32 addrspace(1)* undef
3132  ret <4 x float> %data
3133}
3134
3135define amdgpu_ps <4 x float> @load_3d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
3136  ; GFX9-LABEL: name: load_3d_tfe
3137  ; GFX9: bb.1.main_body:
3138  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
3139  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3140  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3141  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3142  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3143  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3144  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3145  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3146  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3147  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3148  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3149  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
3150  ; GFX9:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3151  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3152  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3153  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3154  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3155  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
3156  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
3157  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
3158  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
3159  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
3160  ; GFX9:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
3161  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF1]](s32)
3162  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
3163  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
3164  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3165  ; GFX9:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
3166  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
3167  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
3168  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
3169  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
3170  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3171  ; GFX10NSA-LABEL: name: load_3d_tfe
3172  ; GFX10NSA: bb.1.main_body:
3173  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
3174  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3175  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3176  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3177  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3178  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3179  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3180  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3181  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3182  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3183  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3184  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
3185  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3186  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3187  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3188  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3189  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3190  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
3191  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
3192  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
3193  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
3194  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
3195  ; GFX10NSA:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
3196  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF1]](s32)
3197  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
3198  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
3199  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3200  ; GFX10NSA:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
3201  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
3202  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
3203  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
3204  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
3205  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3206main_body:
3207  %s = extractelement <2 x i16> %coords_lo, i32 0
3208  %t = extractelement <2 x i16> %coords_lo, i32 1
3209  %r = extractelement <2 x i16> %coords_hi, i32 0
3210  %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 1, i32 0)
3211  %data = extractvalue { <4 x float>, i32 } %v, 0
3212  %tfe = extractvalue { <4 x float>, i32 } %v, 1
3213  store i32 %tfe, i32 addrspace(1)* undef
3214  ret <4 x float> %data
3215}
3216
3217define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
3218  ; GFX9-LABEL: name: load_2darraymsaa_tfe
3219  ; GFX9: bb.1.main_body:
3220  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
3221  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3222  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3223  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3224  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3225  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3226  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3227  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3228  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3229  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3230  ; GFX9:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3231  ; GFX9:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
3232  ; GFX9:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3233  ; GFX9:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3234  ; GFX9:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3235  ; GFX9:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3236  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3237  ; GFX9:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
3238  ; GFX9:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
3239  ; GFX9:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
3240  ; GFX9:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
3241  ; GFX9:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
3242  ; GFX9:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
3243  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
3244  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32)
3245  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
3246  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
3247  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3248  ; GFX9:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
3249  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
3250  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
3251  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
3252  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
3253  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3254  ; GFX10NSA-LABEL: name: load_2darraymsaa_tfe
3255  ; GFX10NSA: bb.1.main_body:
3256  ; GFX10NSA:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
3257  ; GFX10NSA:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3258  ; GFX10NSA:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3259  ; GFX10NSA:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3260  ; GFX10NSA:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3261  ; GFX10NSA:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3262  ; GFX10NSA:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3263  ; GFX10NSA:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3264  ; GFX10NSA:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3265  ; GFX10NSA:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3266  ; GFX10NSA:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3267  ; GFX10NSA:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
3268  ; GFX10NSA:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3269  ; GFX10NSA:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3270  ; GFX10NSA:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3271  ; GFX10NSA:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3272  ; GFX10NSA:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3273  ; GFX10NSA:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
3274  ; GFX10NSA:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
3275  ; GFX10NSA:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
3276  ; GFX10NSA:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
3277  ; GFX10NSA:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
3278  ; GFX10NSA:   [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
3279  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
3280  ; GFX10NSA:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[LSHR3]](s32)
3281  ; GFX10NSA:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
3282  ; GFX10NSA:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
3283  ; GFX10NSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3284  ; GFX10NSA:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
3285  ; GFX10NSA:   $vgpr0 = COPY [[UV]](s32)
3286  ; GFX10NSA:   $vgpr1 = COPY [[UV1]](s32)
3287  ; GFX10NSA:   $vgpr2 = COPY [[UV2]](s32)
3288  ; GFX10NSA:   $vgpr3 = COPY [[UV3]](s32)
3289  ; GFX10NSA:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3290main_body:
3291  %s = extractelement <2 x i16> %coords_lo, i32 0
3292  %t = extractelement <2 x i16> %coords_lo, i32 1
3293  %slice = extractelement <2 x i16> %coords_hi, i32 0
3294  %fragid = extractelement <2 x i16> %coords_hi, i32 1
3295  %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
3296  %data = extractvalue { <4 x float>, i32 } %v, 0
3297  %tfe = extractvalue { <4 x float>, i32 } %v, 1
3298  store i32 %tfe, i32 addrspace(1)* undef
3299  ret <4 x float> %data
3300}
3301
3302declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3303declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3304declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3305declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3306declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3307declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3308declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3309declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3310declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3311declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3312declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3313declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3314declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3315declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3316declare void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float>, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3317declare void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3318declare void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3319declare void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3320declare void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3321declare void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3322declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3323declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3324declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3325declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3326declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3327declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3328declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3329declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3330declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3331declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3332declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3333declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3334declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3335declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3336declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3337declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3338declare float @llvm.amdgcn.image.load.1d.f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3339declare float @llvm.amdgcn.image.load.2d.f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3340declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3341declare void @llvm.amdgcn.image.store.1d.f32.i16(float, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3342declare void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float>, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3343declare { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3344declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3345declare { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3346declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3347
3348attributes #0 = { nounwind }
3349attributes #1 = { nounwind readonly }
3350attributes #2 = { nounwind writeonly }
3351attributes #3 = { nounwind readnone }
3352