1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX7 %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s
5
6; FIXME: Merge with regbankselect, which mostly overlaps when all types supported.
7
8; Natural mapping
9define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
10  ; GFX6-LABEL: name: s_buffer_load_i32
11  ; GFX6: bb.1 (%ir-block.0):
12  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
13  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
14  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
15  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
16  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
17  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
18  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
19  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 4)
20  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
21  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
22  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
23  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
24  ; GFX7-LABEL: name: s_buffer_load_i32
25  ; GFX7: bb.1 (%ir-block.0):
26  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
27  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
28  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
29  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
30  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
31  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
32  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
33  ; GFX7:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 4)
34  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
35  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
36  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
37  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
38  ; GFX8-LABEL: name: s_buffer_load_i32
39  ; GFX8: bb.1 (%ir-block.0):
40  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
41  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
42  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
43  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
44  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
45  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
46  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
47  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 4)
48  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
49  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
50  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
51  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
52  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
53  ret i32 %val
54}
55
56define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
57  ; GFX6-LABEL: name: s_buffer_load_i32_glc
58  ; GFX6: bb.1 (%ir-block.0):
59  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
60  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
61  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
62  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
63  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
64  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
65  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
66  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1, 0 :: (dereferenceable invariant load 4)
67  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
68  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
69  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
70  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
71  ; GFX7-LABEL: name: s_buffer_load_i32_glc
72  ; GFX7: bb.1 (%ir-block.0):
73  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
74  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
75  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
76  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
77  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
78  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
79  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
80  ; GFX7:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1, 0 :: (dereferenceable invariant load 4)
81  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
82  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
83  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
84  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
85  ; GFX8-LABEL: name: s_buffer_load_i32_glc
86  ; GFX8: bb.1 (%ir-block.0):
87  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
88  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
89  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
90  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
91  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
92  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
93  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
94  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1, 0 :: (dereferenceable invariant load 4)
95  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
96  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
97  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
98  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
99  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 1)
100  ret i32 %val
101}
102
103define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
104  ; GFX6-LABEL: name: s_buffer_load_v2i32
105  ; GFX6: bb.1 (%ir-block.0):
106  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
107  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
108  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
109  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
110  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
111  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
112  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
113  ; GFX6:   [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 8, align 4)
114  ; GFX6:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
115  ; GFX6:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
116  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
117  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
118  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
119  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
120  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
121  ; GFX6:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
122  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
123  ; GFX7-LABEL: name: s_buffer_load_v2i32
124  ; GFX7: bb.1 (%ir-block.0):
125  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
126  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
127  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
128  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
129  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
130  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
131  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
132  ; GFX7:   [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 8, align 4)
133  ; GFX7:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
134  ; GFX7:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
135  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
136  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
137  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
138  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
139  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
140  ; GFX7:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
141  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
142  ; GFX8-LABEL: name: s_buffer_load_v2i32
143  ; GFX8: bb.1 (%ir-block.0):
144  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
145  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
146  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
147  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
148  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
149  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
150  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
151  ; GFX8:   [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 8, align 4)
152  ; GFX8:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
153  ; GFX8:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
154  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
155  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
156  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
157  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
158  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
159  ; GFX8:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
160  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
161  %val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
162  ret <2 x i32> %val
163}
164
165define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
166  ; GFX6-LABEL: name: s_buffer_load_v3i32
167  ; GFX6: bb.1 (%ir-block.0):
168  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
169  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
170  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
171  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
172  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
173  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
174  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
175  ; GFX6:   [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4)
176  ; GFX6:   [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0_sub1_sub2
177  ; GFX6:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub0
178  ; GFX6:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub1
179  ; GFX6:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub2
180  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
181  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
182  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
183  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
184  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
185  ; GFX6:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
186  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
187  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
188  ; GFX6:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
189  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
190  ; GFX7-LABEL: name: s_buffer_load_v3i32
191  ; GFX7: bb.1 (%ir-block.0):
192  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
193  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
194  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
195  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
196  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
197  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
198  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
199  ; GFX7:   [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4)
200  ; GFX7:   [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0_sub1_sub2
201  ; GFX7:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub0
202  ; GFX7:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub1
203  ; GFX7:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub2
204  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
205  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
206  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
207  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
208  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
209  ; GFX7:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
210  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
211  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
212  ; GFX7:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
213  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
214  ; GFX8-LABEL: name: s_buffer_load_v3i32
215  ; GFX8: bb.1 (%ir-block.0):
216  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
217  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
218  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
219  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
220  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
221  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
222  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
223  ; GFX8:   [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4)
224  ; GFX8:   [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0_sub1_sub2
225  ; GFX8:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub0
226  ; GFX8:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub1
227  ; GFX8:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub2
228  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
229  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
230  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
231  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
232  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
233  ; GFX8:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
234  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
235  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
236  ; GFX8:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
237  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
238  %val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
239  ret <3 x i32> %val
240}
241
242define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
243  ; GFX6-LABEL: name: s_buffer_load_v8i32
244  ; GFX6: bb.1 (%ir-block.0):
245  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
246  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
247  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
248  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
249  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
250  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
251  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
252  ; GFX6:   [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 32, align 4)
253  ; GFX6:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
254  ; GFX6:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
255  ; GFX6:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
256  ; GFX6:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3
257  ; GFX6:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4
258  ; GFX6:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5
259  ; GFX6:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6
260  ; GFX6:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7
261  ; GFX6:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
262  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
263  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
264  ; GFX6:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
265  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
266  ; GFX6:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
267  ; GFX6:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
268  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec
269  ; GFX6:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
270  ; GFX6:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
271  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
272  ; GFX6:   $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
273  ; GFX6:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
274  ; GFX6:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
275  ; GFX6:   $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
276  ; GFX6:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
277  ; GFX6:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec
278  ; GFX6:   $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
279  ; GFX6:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
280  ; GFX6:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec
281  ; GFX6:   $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
282  ; GFX6:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
283  ; GFX6:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec
284  ; GFX6:   $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
285  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
286  ; GFX7-LABEL: name: s_buffer_load_v8i32
287  ; GFX7: bb.1 (%ir-block.0):
288  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
289  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
290  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
291  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
292  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
293  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
294  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
295  ; GFX7:   [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 32, align 4)
296  ; GFX7:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
297  ; GFX7:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
298  ; GFX7:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
299  ; GFX7:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3
300  ; GFX7:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4
301  ; GFX7:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5
302  ; GFX7:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6
303  ; GFX7:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7
304  ; GFX7:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
305  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
306  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
307  ; GFX7:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
308  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
309  ; GFX7:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
310  ; GFX7:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
311  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec
312  ; GFX7:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
313  ; GFX7:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
314  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
315  ; GFX7:   $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
316  ; GFX7:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
317  ; GFX7:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
318  ; GFX7:   $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
319  ; GFX7:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
320  ; GFX7:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec
321  ; GFX7:   $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
322  ; GFX7:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
323  ; GFX7:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec
324  ; GFX7:   $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
325  ; GFX7:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
326  ; GFX7:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec
327  ; GFX7:   $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
328  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
329  ; GFX8-LABEL: name: s_buffer_load_v8i32
330  ; GFX8: bb.1 (%ir-block.0):
331  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
332  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
333  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
334  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
335  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
336  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
337  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
338  ; GFX8:   [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 32, align 4)
339  ; GFX8:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
340  ; GFX8:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
341  ; GFX8:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
342  ; GFX8:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3
343  ; GFX8:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4
344  ; GFX8:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5
345  ; GFX8:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6
346  ; GFX8:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7
347  ; GFX8:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
348  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
349  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
350  ; GFX8:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
351  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
352  ; GFX8:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
353  ; GFX8:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
354  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec
355  ; GFX8:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
356  ; GFX8:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
357  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
358  ; GFX8:   $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
359  ; GFX8:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
360  ; GFX8:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
361  ; GFX8:   $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
362  ; GFX8:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
363  ; GFX8:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec
364  ; GFX8:   $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
365  ; GFX8:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
366  ; GFX8:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec
367  ; GFX8:   $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
368  ; GFX8:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
369  ; GFX8:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec
370  ; GFX8:   $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
371  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
372  %val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
373  ret <8 x i32> %val
374}
375
376define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
377  ; GFX6-LABEL: name: s_buffer_load_v16i32
378  ; GFX6: bb.1 (%ir-block.0):
379  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
380  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
381  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
382  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
383  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
384  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
385  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
386  ; GFX6:   [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 64, align 4)
387  ; GFX6:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
388  ; GFX6:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
389  ; GFX6:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
390  ; GFX6:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3
391  ; GFX6:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4
392  ; GFX6:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5
393  ; GFX6:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6
394  ; GFX6:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7
395  ; GFX6:   [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8
396  ; GFX6:   [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9
397  ; GFX6:   [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10
398  ; GFX6:   [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11
399  ; GFX6:   [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12
400  ; GFX6:   [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13
401  ; GFX6:   [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14
402  ; GFX6:   [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15
403  ; GFX6:   [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
404  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec
405  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
406  ; GFX6:   [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
407  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec
408  ; GFX6:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
409  ; GFX6:   [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
410  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec
411  ; GFX6:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
412  ; GFX6:   [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
413  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
414  ; GFX6:   $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
415  ; GFX6:   [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
416  ; GFX6:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
417  ; GFX6:   $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
418  ; GFX6:   [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
419  ; GFX6:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec
420  ; GFX6:   $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
421  ; GFX6:   [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
422  ; GFX6:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec
423  ; GFX6:   $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
424  ; GFX6:   [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
425  ; GFX6:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec
426  ; GFX6:   $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
427  ; GFX6:   [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
428  ; GFX6:   [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec
429  ; GFX6:   $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]]
430  ; GFX6:   [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
431  ; GFX6:   [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec
432  ; GFX6:   $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]]
433  ; GFX6:   [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]]
434  ; GFX6:   [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
435  ; GFX6:   $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]]
436  ; GFX6:   [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]]
437  ; GFX6:   [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
438  ; GFX6:   $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]]
439  ; GFX6:   [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
440  ; GFX6:   [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec
441  ; GFX6:   $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]]
442  ; GFX6:   [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]]
443  ; GFX6:   [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec
444  ; GFX6:   $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]]
445  ; GFX6:   [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]]
446  ; GFX6:   [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec
447  ; GFX6:   $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]]
448  ; GFX6:   [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]]
449  ; GFX6:   [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec
450  ; GFX6:   $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]]
451  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
452  ; GFX7-LABEL: name: s_buffer_load_v16i32
453  ; GFX7: bb.1 (%ir-block.0):
454  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
455  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
456  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
457  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
458  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
459  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
460  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
461  ; GFX7:   [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 64, align 4)
462  ; GFX7:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
463  ; GFX7:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
464  ; GFX7:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
465  ; GFX7:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3
466  ; GFX7:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4
467  ; GFX7:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5
468  ; GFX7:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6
469  ; GFX7:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7
470  ; GFX7:   [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8
471  ; GFX7:   [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9
472  ; GFX7:   [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10
473  ; GFX7:   [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11
474  ; GFX7:   [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12
475  ; GFX7:   [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13
476  ; GFX7:   [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14
477  ; GFX7:   [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15
478  ; GFX7:   [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
479  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec
480  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
481  ; GFX7:   [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
482  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec
483  ; GFX7:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
484  ; GFX7:   [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
485  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec
486  ; GFX7:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
487  ; GFX7:   [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
488  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
489  ; GFX7:   $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
490  ; GFX7:   [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
491  ; GFX7:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
492  ; GFX7:   $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
493  ; GFX7:   [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
494  ; GFX7:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec
495  ; GFX7:   $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
496  ; GFX7:   [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
497  ; GFX7:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec
498  ; GFX7:   $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
499  ; GFX7:   [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
500  ; GFX7:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec
501  ; GFX7:   $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
502  ; GFX7:   [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
503  ; GFX7:   [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec
504  ; GFX7:   $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]]
505  ; GFX7:   [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
506  ; GFX7:   [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec
507  ; GFX7:   $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]]
508  ; GFX7:   [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]]
509  ; GFX7:   [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
510  ; GFX7:   $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]]
511  ; GFX7:   [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]]
512  ; GFX7:   [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
513  ; GFX7:   $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]]
514  ; GFX7:   [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
515  ; GFX7:   [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec
516  ; GFX7:   $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]]
517  ; GFX7:   [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]]
518  ; GFX7:   [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec
519  ; GFX7:   $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]]
520  ; GFX7:   [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]]
521  ; GFX7:   [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec
522  ; GFX7:   $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]]
523  ; GFX7:   [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]]
524  ; GFX7:   [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec
525  ; GFX7:   $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]]
526  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
527  ; GFX8-LABEL: name: s_buffer_load_v16i32
528  ; GFX8: bb.1 (%ir-block.0):
529  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
530  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
531  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
532  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
533  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
534  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
535  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
536  ; GFX8:   [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 64, align 4)
537  ; GFX8:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
538  ; GFX8:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
539  ; GFX8:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
540  ; GFX8:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3
541  ; GFX8:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4
542  ; GFX8:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5
543  ; GFX8:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6
544  ; GFX8:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7
545  ; GFX8:   [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8
546  ; GFX8:   [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9
547  ; GFX8:   [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10
548  ; GFX8:   [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11
549  ; GFX8:   [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12
550  ; GFX8:   [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13
551  ; GFX8:   [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14
552  ; GFX8:   [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15
553  ; GFX8:   [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
554  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec
555  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
556  ; GFX8:   [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
557  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec
558  ; GFX8:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
559  ; GFX8:   [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
560  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec
561  ; GFX8:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
562  ; GFX8:   [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
563  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
564  ; GFX8:   $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
565  ; GFX8:   [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
566  ; GFX8:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
567  ; GFX8:   $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
568  ; GFX8:   [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
569  ; GFX8:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec
570  ; GFX8:   $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
571  ; GFX8:   [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
572  ; GFX8:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec
573  ; GFX8:   $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
574  ; GFX8:   [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
575  ; GFX8:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec
576  ; GFX8:   $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
577  ; GFX8:   [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
578  ; GFX8:   [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec
579  ; GFX8:   $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]]
580  ; GFX8:   [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
581  ; GFX8:   [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec
582  ; GFX8:   $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]]
583  ; GFX8:   [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]]
584  ; GFX8:   [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
585  ; GFX8:   $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]]
586  ; GFX8:   [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]]
587  ; GFX8:   [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
588  ; GFX8:   $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]]
589  ; GFX8:   [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
590  ; GFX8:   [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec
591  ; GFX8:   $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]]
592  ; GFX8:   [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]]
593  ; GFX8:   [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec
594  ; GFX8:   $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]]
595  ; GFX8:   [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]]
596  ; GFX8:   [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec
597  ; GFX8:   $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]]
598  ; GFX8:   [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]]
599  ; GFX8:   [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec
600  ; GFX8:   $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]]
601  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
602  %val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
603  ret <16 x i32> %val
604}
605
606define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) {
607  ; GFX6-LABEL: name: s_buffer_load_i32_offset_1
608  ; GFX6: bb.1 (%ir-block.0):
609  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
610  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
611  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
612  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
613  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
614  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
615  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
616  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
617  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
618  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
619  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
620  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
621  ; GFX7-LABEL: name: s_buffer_load_i32_offset_1
622  ; GFX7: bb.1 (%ir-block.0):
623  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
624  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
625  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
626  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
627  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
628  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
629  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
630  ; GFX7:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
631  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
632  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
633  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
634  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
635  ; GFX8-LABEL: name: s_buffer_load_i32_offset_1
636  ; GFX8: bb.1 (%ir-block.0):
637  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
638  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
639  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
640  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
641  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
642  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
643  ; GFX8:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0, 0 :: (dereferenceable invariant load 4)
644  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
645  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
646  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
647  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
648  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1, i32 0)
649  ret i32 %val
650}
651
652define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) {
653  ; GFX6-LABEL: name: s_buffer_load_i32_offset_glc_4
654  ; GFX6: bb.1 (%ir-block.0):
655  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
656  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
657  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
658  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
659  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
660  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
661  ; GFX6:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1, 0 :: (dereferenceable invariant load 4)
662  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
663  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
664  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
665  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
666  ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_4
667  ; GFX7: bb.1 (%ir-block.0):
668  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
669  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
670  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
671  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
672  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
673  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
674  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1, 0 :: (dereferenceable invariant load 4)
675  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
676  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
677  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
678  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
679  ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_4
680  ; GFX8: bb.1 (%ir-block.0):
681  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
682  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
683  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
684  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
685  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
686  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
687  ; GFX8:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1, 0 :: (dereferenceable invariant load 4)
688  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
689  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
690  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
691  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
692  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 4, i32 1)
693  ret i32 %val
694}
695
696define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) {
697  ; GFX6-LABEL: name: s_buffer_load_i32_offset_255
698  ; GFX6: bb.1 (%ir-block.0):
699  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
700  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
701  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
702  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
703  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
704  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
705  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255
706  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
707  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
708  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
709  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
710  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
711  ; GFX7-LABEL: name: s_buffer_load_i32_offset_255
712  ; GFX7: bb.1 (%ir-block.0):
713  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
714  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
715  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
716  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
717  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
718  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
719  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255
720  ; GFX7:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
721  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
722  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
723  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
724  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
725  ; GFX8-LABEL: name: s_buffer_load_i32_offset_255
726  ; GFX8: bb.1 (%ir-block.0):
727  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
728  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
729  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
730  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
731  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
732  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
733  ; GFX8:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0, 0 :: (dereferenceable invariant load 4)
734  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
735  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
736  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
737  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
738  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 255, i32 0)
739  ret i32 %val
740}
741
742define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) {
743  ; GFX6-LABEL: name: s_buffer_load_i32_offset_256
744  ; GFX6: bb.1 (%ir-block.0):
745  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
746  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
747  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
748  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
749  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
750  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
751  ; GFX6:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0, 0 :: (dereferenceable invariant load 4)
752  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
753  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
754  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
755  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
756  ; GFX7-LABEL: name: s_buffer_load_i32_offset_256
757  ; GFX7: bb.1 (%ir-block.0):
758  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
759  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
760  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
761  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
762  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
763  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
764  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0, 0 :: (dereferenceable invariant load 4)
765  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
766  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
767  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
768  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
769  ; GFX8-LABEL: name: s_buffer_load_i32_offset_256
770  ; GFX8: bb.1 (%ir-block.0):
771  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
772  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
773  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
774  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
775  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
776  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
777  ; GFX8:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0, 0 :: (dereferenceable invariant load 4)
778  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
779  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
780  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
781  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
782  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 256, i32 0)
783  ret i32 %val
784}
785
786define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) {
787  ; GFX6-LABEL: name: s_buffer_load_i32_offset_1020
788  ; GFX6: bb.1 (%ir-block.0):
789  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
790  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
791  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
792  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
793  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
794  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
795  ; GFX6:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0, 0 :: (dereferenceable invariant load 4)
796  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
797  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
798  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
799  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
800  ; GFX7-LABEL: name: s_buffer_load_i32_offset_1020
801  ; GFX7: bb.1 (%ir-block.0):
802  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
803  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
804  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
805  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
806  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
807  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
808  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0, 0 :: (dereferenceable invariant load 4)
809  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
810  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
811  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
812  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
813  ; GFX8-LABEL: name: s_buffer_load_i32_offset_1020
814  ; GFX8: bb.1 (%ir-block.0):
815  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
816  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
817  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
818  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
819  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
820  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
821  ; GFX8:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0, 0 :: (dereferenceable invariant load 4)
822  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
823  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
824  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
825  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
826  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1020, i32 0)
827  ret i32 %val
828}
829
830define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) {
831  ; GFX6-LABEL: name: s_buffer_load_i32_offset_1023
832  ; GFX6: bb.1 (%ir-block.0):
833  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
834  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
835  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
836  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
837  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
838  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
839  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023
840  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
841  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
842  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
843  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
844  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
845  ; GFX7-LABEL: name: s_buffer_load_i32_offset_1023
846  ; GFX7: bb.1 (%ir-block.0):
847  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
848  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
849  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
850  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
851  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
852  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
853  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023
854  ; GFX7:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
855  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
856  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
857  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
858  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
859  ; GFX8-LABEL: name: s_buffer_load_i32_offset_1023
860  ; GFX8: bb.1 (%ir-block.0):
861  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
862  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
863  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
864  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
865  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
866  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
867  ; GFX8:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0, 0 :: (dereferenceable invariant load 4)
868  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
869  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
870  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
871  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
872  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1023, i32 0)
873  ret i32 %val
874}
875
876define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) {
877  ; GFX6-LABEL: name: s_buffer_load_i32_offset_1024
878  ; GFX6: bb.1 (%ir-block.0):
879  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
880  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
881  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
882  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
883  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
884  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
885  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
886  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
887  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
888  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
889  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
890  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
891  ; GFX7-LABEL: name: s_buffer_load_i32_offset_1024
892  ; GFX7: bb.1 (%ir-block.0):
893  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
894  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
895  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
896  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
897  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
898  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
899  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0, 0 :: (dereferenceable invariant load 4)
900  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
901  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
902  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
903  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
904  ; GFX8-LABEL: name: s_buffer_load_i32_offset_1024
905  ; GFX8: bb.1 (%ir-block.0):
906  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
907  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
908  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
909  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
910  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
911  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
912  ; GFX8:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0, 0 :: (dereferenceable invariant load 4)
913  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
914  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
915  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
916  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
917  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1024, i32 0)
918  ret i32 %val
919}
920
921define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) {
922  ; GFX6-LABEL: name: s_buffer_load_i32_offset_1025
923  ; GFX6: bb.1 (%ir-block.0):
924  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
925  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
926  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
927  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
928  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
929  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
930  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025
931  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
932  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
933  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
934  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
935  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
936  ; GFX7-LABEL: name: s_buffer_load_i32_offset_1025
937  ; GFX7: bb.1 (%ir-block.0):
938  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
939  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
940  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
941  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
942  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
943  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
944  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025
945  ; GFX7:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
946  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
947  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
948  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
949  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
950  ; GFX8-LABEL: name: s_buffer_load_i32_offset_1025
951  ; GFX8: bb.1 (%ir-block.0):
952  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
953  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
954  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
955  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
956  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
957  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
958  ; GFX8:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0, 0 :: (dereferenceable invariant load 4)
959  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
960  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
961  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
962  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
963  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1025, i32 0)
964  ret i32 %val
965}
966
967define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) {
968  ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg1
969  ; GFX6: bb.1 (%ir-block.0):
970  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
971  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
972  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
973  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
974  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
975  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
976  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
977  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
978  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
979  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
980  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
981  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
982  ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg1
983  ; GFX7: bb.1 (%ir-block.0):
984  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
985  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
986  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
987  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
988  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
989  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
990  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
991  ; GFX7:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
992  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
993  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
994  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
995  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
996  ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg1
997  ; GFX8: bb.1 (%ir-block.0):
998  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
999  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1000  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1001  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1002  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1003  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1004  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
1005  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1006  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1007  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1008  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1009  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
1010  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0)
1011  ret i32 %load
1012}
1013
1014define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) {
1015  ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg4
1016  ; GFX6: bb.1 (%ir-block.0):
1017  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1018  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1019  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1020  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1021  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1022  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1023  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292
1024  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1025  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1026  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1027  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1028  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
1029  ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg4
1030  ; GFX7: bb.1 (%ir-block.0):
1031  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1032  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1033  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1034  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1035  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1036  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1037  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0, 0 :: (dereferenceable invariant load 4)
1038  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1039  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1040  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1041  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
1042  ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg4
1043  ; GFX8: bb.1 (%ir-block.0):
1044  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1045  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1046  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1047  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1048  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1049  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1050  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292
1051  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1052  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1053  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1054  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1055  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
1056  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0)
1057  ret i32 %load
1058}
1059
1060define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) {
1061  ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg8
1062  ; GFX6: bb.1 (%ir-block.0):
1063  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1064  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1065  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1066  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1067  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1068  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1069  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967288
1070  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1071  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1072  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1073  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1074  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
1075  ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg8
1076  ; GFX7: bb.1 (%ir-block.0):
1077  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1078  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1079  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1080  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1081  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1082  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1083  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0, 0 :: (dereferenceable invariant load 4)
1084  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1085  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1086  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1087  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
1088  ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg8
1089  ; GFX8: bb.1 (%ir-block.0):
1090  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1091  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1092  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1093  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1094  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1095  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1096  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967288
1097  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1098  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1099  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1100  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1101  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
1102  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0)
1103  ret i32 %load
1104}
1105
1106define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) {
1107  ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit31
1108  ; GFX6: bb.1 (%ir-block.0):
1109  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1110  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1111  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1112  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1113  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1114  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1115  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
1116  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1117  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1118  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1119  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1120  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
1121  ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit31
1122  ; GFX7: bb.1 (%ir-block.0):
1123  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1124  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1125  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1126  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1127  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1128  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1129  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0, 0 :: (dereferenceable invariant load 4)
1130  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1131  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1132  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1133  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
1134  ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit31
1135  ; GFX8: bb.1 (%ir-block.0):
1136  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1137  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1138  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1139  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1140  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1141  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1142  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
1143  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1144  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1145  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1146  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1147  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
1148  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0)
1149  ret i32 %load
1150}
1151
1152define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc) {
1153  ; GFX6-LABEL: name: s_buffer_load_i32_offset_glc_bit30
1154  ; GFX6: bb.1 (%ir-block.0):
1155  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1156  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1157  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1158  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1159  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1160  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1161  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824
1162  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1, 0 :: (dereferenceable invariant load 4)
1163  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1164  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1165  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1166  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
1167  ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_bit30
1168  ; GFX7: bb.1 (%ir-block.0):
1169  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1170  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1171  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1172  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1173  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1174  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1175  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1, 0 :: (dereferenceable invariant load 4)
1176  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1177  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1178  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1179  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
1180  ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_bit30
1181  ; GFX8: bb.1 (%ir-block.0):
1182  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1183  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1184  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1185  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1186  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1187  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1188  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824
1189  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1, 0 :: (dereferenceable invariant load 4)
1190  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1191  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1192  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1193  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
1194  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 1)
1195  ret i32 %load
1196}
1197
1198define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) {
1199  ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit29
1200  ; GFX6: bb.1 (%ir-block.0):
1201  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1202  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1203  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1204  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1205  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1206  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1207  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912
1208  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1209  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1210  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1211  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1212  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
1213  ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit29
1214  ; GFX7: bb.1 (%ir-block.0):
1215  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1216  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1217  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1218  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1219  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1220  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1221  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0, 0 :: (dereferenceable invariant load 4)
1222  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1223  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1224  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1225  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
1226  ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit29
1227  ; GFX8: bb.1 (%ir-block.0):
1228  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1229  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1230  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1231  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1232  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1233  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1234  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912
1235  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1236  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1237  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1238  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1239  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
1240  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0)
1241  ret i32 %load
1242}
1243
1244define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) {
1245  ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit21
1246  ; GFX6: bb.1 (%ir-block.0):
1247  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1248  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1249  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1250  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1251  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1252  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1253  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152
1254  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1255  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1256  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1257  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1258  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
1259  ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit21
1260  ; GFX7: bb.1 (%ir-block.0):
1261  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1262  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1263  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1264  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1265  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1266  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1267  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0, 0 :: (dereferenceable invariant load 4)
1268  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1269  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1270  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1271  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
1272  ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit21
1273  ; GFX8: bb.1 (%ir-block.0):
1274  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1275  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1276  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1277  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1278  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1279  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1280  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152
1281  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1282  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1283  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1284  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1285  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
1286  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0)
1287  ret i32 %load
1288}
1289
1290define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) {
1291  ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit20
1292  ; GFX6: bb.1 (%ir-block.0):
1293  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1294  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1295  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1296  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1297  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1298  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1299  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
1300  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1301  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1302  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1303  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1304  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
1305  ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit20
1306  ; GFX7: bb.1 (%ir-block.0):
1307  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1308  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1309  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1310  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1311  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1312  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1313  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0, 0 :: (dereferenceable invariant load 4)
1314  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1315  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1316  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1317  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
1318  ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit20
1319  ; GFX8: bb.1 (%ir-block.0):
1320  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1321  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1322  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1323  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1324  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1325  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1326  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
1327  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1328  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1329  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1330  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1331  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
1332  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0)
1333  ret i32 %load
1334}
1335
1336define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc) {
1337  ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg_bit20
1338  ; GFX6: bb.1 (%ir-block.0):
1339  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1340  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1341  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1342  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1343  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1344  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1345  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4293918720
1346  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1347  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1348  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1349  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1350  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
1351  ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit20
1352  ; GFX7: bb.1 (%ir-block.0):
1353  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1354  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1355  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1356  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1357  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1358  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1359  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0, 0 :: (dereferenceable invariant load 4)
1360  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1361  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1362  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1363  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
1364  ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit20
1365  ; GFX8: bb.1 (%ir-block.0):
1366  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1367  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1368  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1369  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1370  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1371  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1372  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4293918720
1373  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1374  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1375  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1376  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1377  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
1378  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32  -1048576, i32 0)
1379  ret i32 %load
1380}
1381
1382define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) {
1383  ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit19
1384  ; GFX6: bb.1 (%ir-block.0):
1385  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1386  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1387  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1388  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1389  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1390  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1391  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 524288
1392  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1393  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1394  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1395  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1396  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
1397  ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit19
1398  ; GFX7: bb.1 (%ir-block.0):
1399  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1400  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1401  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1402  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1403  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1404  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1405  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0, 0 :: (dereferenceable invariant load 4)
1406  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1407  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1408  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1409  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
1410  ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit19
1411  ; GFX8: bb.1 (%ir-block.0):
1412  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1413  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1414  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1415  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1416  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1417  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1418  ; GFX8:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0, 0 :: (dereferenceable invariant load 4)
1419  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
1420  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1421  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1422  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
1423  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0)
1424  ret i32 %load
1425}
1426
1427define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc) {
1428  ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg_bit19
1429  ; GFX6: bb.1 (%ir-block.0):
1430  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1431  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1432  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1433  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1434  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1435  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1436  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294443008
1437  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1438  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1439  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1440  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1441  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
1442  ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit19
1443  ; GFX7: bb.1 (%ir-block.0):
1444  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1445  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1446  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1447  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1448  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1449  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1450  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0, 0 :: (dereferenceable invariant load 4)
1451  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1452  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1453  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1454  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
1455  ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit19
1456  ; GFX8: bb.1 (%ir-block.0):
1457  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1458  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1459  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1460  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1461  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1462  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1463  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294443008
1464  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1465  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1466  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1467  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1468  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
1469  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0)
1470  ret i32 %load
1471}
1472
1473; Check cases that need to be converted to MUBUF due to the offset being a VGPR.
1474define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1475  ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset
1476  ; GFX6: bb.1 (%ir-block.0):
1477  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1478  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1479  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1480  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1481  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1482  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1483  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1484  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1485  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
1486  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
1487  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
1488  ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset
1489  ; GFX7: bb.1 (%ir-block.0):
1490  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1491  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1492  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1493  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1494  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1495  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1496  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1497  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1498  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
1499  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
1500  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
1501  ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset
1502  ; GFX8: bb.1 (%ir-block.0):
1503  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1504  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1505  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1506  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1507  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1508  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1509  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1510  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1511  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
1512  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
1513  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
1514  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1515  ret float %val
1516}
1517
1518define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1519  ; GFX6-LABEL: name: s_buffer_load_v2f32_vgpr_offset
1520  ; GFX6: bb.1 (%ir-block.0):
1521  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1522  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1523  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1524  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1525  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1526  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1527  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1528  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1529  ; GFX6:   [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4)
1530  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
1531  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
1532  ; GFX6:   $vgpr0 = COPY [[COPY5]]
1533  ; GFX6:   $vgpr1 = COPY [[COPY6]]
1534  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1535  ; GFX7-LABEL: name: s_buffer_load_v2f32_vgpr_offset
1536  ; GFX7: bb.1 (%ir-block.0):
1537  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1538  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1539  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1540  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1541  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1542  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1543  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1544  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1545  ; GFX7:   [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4)
1546  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
1547  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
1548  ; GFX7:   $vgpr0 = COPY [[COPY5]]
1549  ; GFX7:   $vgpr1 = COPY [[COPY6]]
1550  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1551  ; GFX8-LABEL: name: s_buffer_load_v2f32_vgpr_offset
1552  ; GFX8: bb.1 (%ir-block.0):
1553  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1554  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1555  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1556  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1557  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1558  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1559  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1560  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1561  ; GFX8:   [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4)
1562  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
1563  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
1564  ; GFX8:   $vgpr0 = COPY [[COPY5]]
1565  ; GFX8:   $vgpr1 = COPY [[COPY6]]
1566  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1567  %val = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1568  ret <2 x float> %val
1569}
1570
1571define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1572  ; GFX6-LABEL: name: s_buffer_load_v3f32_vgpr_offset
1573  ; GFX6: bb.1 (%ir-block.0):
1574  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1575  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1576  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1577  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1578  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1579  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1580  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1581  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1582  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1583  ; GFX6:   [[COPY5:%[0-9]+]]:vreg_96 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0_sub1_sub2
1584  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub0
1585  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub1
1586  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub2
1587  ; GFX6:   $vgpr0 = COPY [[COPY6]]
1588  ; GFX6:   $vgpr1 = COPY [[COPY7]]
1589  ; GFX6:   $vgpr2 = COPY [[COPY8]]
1590  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
1591  ; GFX7-LABEL: name: s_buffer_load_v3f32_vgpr_offset
1592  ; GFX7: bb.1 (%ir-block.0):
1593  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1594  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1595  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1596  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1597  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1598  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1599  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1600  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1601  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1602  ; GFX7:   [[COPY5:%[0-9]+]]:vreg_96 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0_sub1_sub2
1603  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub0
1604  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub1
1605  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub2
1606  ; GFX7:   $vgpr0 = COPY [[COPY6]]
1607  ; GFX7:   $vgpr1 = COPY [[COPY7]]
1608  ; GFX7:   $vgpr2 = COPY [[COPY8]]
1609  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
1610  ; GFX8-LABEL: name: s_buffer_load_v3f32_vgpr_offset
1611  ; GFX8: bb.1 (%ir-block.0):
1612  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1613  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1614  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1615  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1616  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1617  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1618  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1619  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1620  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1621  ; GFX8:   [[COPY5:%[0-9]+]]:vreg_96 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0_sub1_sub2
1622  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub0
1623  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub1
1624  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub2
1625  ; GFX8:   $vgpr0 = COPY [[COPY6]]
1626  ; GFX8:   $vgpr1 = COPY [[COPY7]]
1627  ; GFX8:   $vgpr2 = COPY [[COPY8]]
1628  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
1629  %val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1630  ret <3 x float> %val
1631}
1632
1633define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1634  ; GFX6-LABEL: name: s_buffer_load_v4f32_vgpr_offset
1635  ; GFX6: bb.1 (%ir-block.0):
1636  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1637  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1638  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1639  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1640  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1641  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1642  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1643  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1644  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1645  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
1646  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
1647  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
1648  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
1649  ; GFX6:   $vgpr0 = COPY [[COPY5]]
1650  ; GFX6:   $vgpr1 = COPY [[COPY6]]
1651  ; GFX6:   $vgpr2 = COPY [[COPY7]]
1652  ; GFX6:   $vgpr3 = COPY [[COPY8]]
1653  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
1654  ; GFX7-LABEL: name: s_buffer_load_v4f32_vgpr_offset
1655  ; GFX7: bb.1 (%ir-block.0):
1656  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1657  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1658  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1659  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1660  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1661  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1662  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1663  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1664  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1665  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
1666  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
1667  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
1668  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
1669  ; GFX7:   $vgpr0 = COPY [[COPY5]]
1670  ; GFX7:   $vgpr1 = COPY [[COPY6]]
1671  ; GFX7:   $vgpr2 = COPY [[COPY7]]
1672  ; GFX7:   $vgpr3 = COPY [[COPY8]]
1673  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
1674  ; GFX8-LABEL: name: s_buffer_load_v4f32_vgpr_offset
1675  ; GFX8: bb.1 (%ir-block.0):
1676  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1677  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1678  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1679  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1680  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1681  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1682  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1683  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1684  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1685  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
1686  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
1687  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
1688  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
1689  ; GFX8:   $vgpr0 = COPY [[COPY5]]
1690  ; GFX8:   $vgpr1 = COPY [[COPY6]]
1691  ; GFX8:   $vgpr2 = COPY [[COPY7]]
1692  ; GFX8:   $vgpr3 = COPY [[COPY8]]
1693  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
1694  %val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1695  ret <4 x float> %val
1696}
1697
1698define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1699  ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset
1700  ; GFX6: bb.1 (%ir-block.0):
1701  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1702  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1703  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1704  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1705  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1706  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1707  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1708  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1709  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1710  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1711  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
1712  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
1713  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
1714  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
1715  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
1716  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
1717  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
1718  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
1719  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
1720  ; GFX6:   $vgpr0 = COPY [[COPY5]]
1721  ; GFX6:   $vgpr1 = COPY [[COPY6]]
1722  ; GFX6:   $vgpr2 = COPY [[COPY7]]
1723  ; GFX6:   $vgpr3 = COPY [[COPY8]]
1724  ; GFX6:   $vgpr4 = COPY [[COPY9]]
1725  ; GFX6:   $vgpr5 = COPY [[COPY10]]
1726  ; GFX6:   $vgpr6 = COPY [[COPY11]]
1727  ; GFX6:   $vgpr7 = COPY [[COPY12]]
1728  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
1729  ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset
1730  ; GFX7: bb.1 (%ir-block.0):
1731  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1732  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1733  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1734  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1735  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1736  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1737  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1738  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1739  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1740  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1741  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
1742  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
1743  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
1744  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
1745  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
1746  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
1747  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
1748  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
1749  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
1750  ; GFX7:   $vgpr0 = COPY [[COPY5]]
1751  ; GFX7:   $vgpr1 = COPY [[COPY6]]
1752  ; GFX7:   $vgpr2 = COPY [[COPY7]]
1753  ; GFX7:   $vgpr3 = COPY [[COPY8]]
1754  ; GFX7:   $vgpr4 = COPY [[COPY9]]
1755  ; GFX7:   $vgpr5 = COPY [[COPY10]]
1756  ; GFX7:   $vgpr6 = COPY [[COPY11]]
1757  ; GFX7:   $vgpr7 = COPY [[COPY12]]
1758  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
1759  ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset
1760  ; GFX8: bb.1 (%ir-block.0):
1761  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1762  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1763  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1764  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1765  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1766  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1767  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1768  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1769  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1770  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1771  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
1772  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
1773  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
1774  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
1775  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
1776  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
1777  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
1778  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
1779  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
1780  ; GFX8:   $vgpr0 = COPY [[COPY5]]
1781  ; GFX8:   $vgpr1 = COPY [[COPY6]]
1782  ; GFX8:   $vgpr2 = COPY [[COPY7]]
1783  ; GFX8:   $vgpr3 = COPY [[COPY8]]
1784  ; GFX8:   $vgpr4 = COPY [[COPY9]]
1785  ; GFX8:   $vgpr5 = COPY [[COPY10]]
1786  ; GFX8:   $vgpr6 = COPY [[COPY11]]
1787  ; GFX8:   $vgpr7 = COPY [[COPY12]]
1788  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
1789  %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1790  ret <8 x float> %val
1791}
1792
1793define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1794  ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset
1795  ; GFX6: bb.1 (%ir-block.0):
1796  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1797  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1798  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1799  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1800  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1801  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1802  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1803  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1804  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1805  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1806  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4)
1807  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4)
1808  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
1809  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
1810  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
1811  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
1812  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
1813  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
1814  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
1815  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
1816  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
1817  ; GFX6:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
1818  ; GFX6:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
1819  ; GFX6:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
1820  ; GFX6:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
1821  ; GFX6:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
1822  ; GFX6:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
1823  ; GFX6:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
1824  ; GFX6:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
1825  ; GFX6:   $vgpr0 = COPY [[COPY5]]
1826  ; GFX6:   $vgpr1 = COPY [[COPY6]]
1827  ; GFX6:   $vgpr2 = COPY [[COPY7]]
1828  ; GFX6:   $vgpr3 = COPY [[COPY8]]
1829  ; GFX6:   $vgpr4 = COPY [[COPY9]]
1830  ; GFX6:   $vgpr5 = COPY [[COPY10]]
1831  ; GFX6:   $vgpr6 = COPY [[COPY11]]
1832  ; GFX6:   $vgpr7 = COPY [[COPY12]]
1833  ; GFX6:   $vgpr8 = COPY [[COPY13]]
1834  ; GFX6:   $vgpr9 = COPY [[COPY14]]
1835  ; GFX6:   $vgpr10 = COPY [[COPY15]]
1836  ; GFX6:   $vgpr11 = COPY [[COPY16]]
1837  ; GFX6:   $vgpr12 = COPY [[COPY17]]
1838  ; GFX6:   $vgpr13 = COPY [[COPY18]]
1839  ; GFX6:   $vgpr14 = COPY [[COPY19]]
1840  ; GFX6:   $vgpr15 = COPY [[COPY20]]
1841  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
1842  ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset
1843  ; GFX7: bb.1 (%ir-block.0):
1844  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1845  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1846  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1847  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1848  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1849  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1850  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1851  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1852  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1853  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1854  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4)
1855  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4)
1856  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
1857  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
1858  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
1859  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
1860  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
1861  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
1862  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
1863  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
1864  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
1865  ; GFX7:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
1866  ; GFX7:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
1867  ; GFX7:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
1868  ; GFX7:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
1869  ; GFX7:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
1870  ; GFX7:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
1871  ; GFX7:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
1872  ; GFX7:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
1873  ; GFX7:   $vgpr0 = COPY [[COPY5]]
1874  ; GFX7:   $vgpr1 = COPY [[COPY6]]
1875  ; GFX7:   $vgpr2 = COPY [[COPY7]]
1876  ; GFX7:   $vgpr3 = COPY [[COPY8]]
1877  ; GFX7:   $vgpr4 = COPY [[COPY9]]
1878  ; GFX7:   $vgpr5 = COPY [[COPY10]]
1879  ; GFX7:   $vgpr6 = COPY [[COPY11]]
1880  ; GFX7:   $vgpr7 = COPY [[COPY12]]
1881  ; GFX7:   $vgpr8 = COPY [[COPY13]]
1882  ; GFX7:   $vgpr9 = COPY [[COPY14]]
1883  ; GFX7:   $vgpr10 = COPY [[COPY15]]
1884  ; GFX7:   $vgpr11 = COPY [[COPY16]]
1885  ; GFX7:   $vgpr12 = COPY [[COPY17]]
1886  ; GFX7:   $vgpr13 = COPY [[COPY18]]
1887  ; GFX7:   $vgpr14 = COPY [[COPY19]]
1888  ; GFX7:   $vgpr15 = COPY [[COPY20]]
1889  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
1890  ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset
1891  ; GFX8: bb.1 (%ir-block.0):
1892  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1893  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1894  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1895  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1896  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1897  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1898  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1899  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1900  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1901  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1902  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4)
1903  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4)
1904  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
1905  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
1906  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
1907  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
1908  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
1909  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
1910  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
1911  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
1912  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
1913  ; GFX8:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
1914  ; GFX8:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
1915  ; GFX8:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
1916  ; GFX8:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
1917  ; GFX8:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
1918  ; GFX8:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
1919  ; GFX8:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
1920  ; GFX8:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
1921  ; GFX8:   $vgpr0 = COPY [[COPY5]]
1922  ; GFX8:   $vgpr1 = COPY [[COPY6]]
1923  ; GFX8:   $vgpr2 = COPY [[COPY7]]
1924  ; GFX8:   $vgpr3 = COPY [[COPY8]]
1925  ; GFX8:   $vgpr4 = COPY [[COPY9]]
1926  ; GFX8:   $vgpr5 = COPY [[COPY10]]
1927  ; GFX8:   $vgpr6 = COPY [[COPY11]]
1928  ; GFX8:   $vgpr7 = COPY [[COPY12]]
1929  ; GFX8:   $vgpr8 = COPY [[COPY13]]
1930  ; GFX8:   $vgpr9 = COPY [[COPY14]]
1931  ; GFX8:   $vgpr10 = COPY [[COPY15]]
1932  ; GFX8:   $vgpr11 = COPY [[COPY16]]
1933  ; GFX8:   $vgpr12 = COPY [[COPY17]]
1934  ; GFX8:   $vgpr13 = COPY [[COPY18]]
1935  ; GFX8:   $vgpr14 = COPY [[COPY19]]
1936  ; GFX8:   $vgpr15 = COPY [[COPY20]]
1937  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
1938  %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1939  ret <16 x float> %val
1940}
1941
1942define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg %rsrc, i32 %soffset.base) {
1943  ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092
1944  ; GFX6: bb.1 (%ir-block.0):
1945  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1946  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1947  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1948  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1949  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1950  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1951  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1952  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1953  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
1954  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
1955  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
1956  ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092
1957  ; GFX7: bb.1 (%ir-block.0):
1958  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1959  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1960  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1961  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1962  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1963  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1964  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1965  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1966  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
1967  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
1968  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
1969  ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092
1970  ; GFX8: bb.1 (%ir-block.0):
1971  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1972  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1973  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1974  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1975  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1976  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1977  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1978  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1979  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
1980  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
1981  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
1982  %soffset = add i32 %soffset.base, 4092
1983  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1984  ret float %val
1985}
1986
1987define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg %rsrc, i32 %soffset.base) {
1988  ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095
1989  ; GFX6: bb.1 (%ir-block.0):
1990  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1991  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1992  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1993  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1994  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1995  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1996  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1997  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1998  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
1999  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2000  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
2001  ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095
2002  ; GFX7: bb.1 (%ir-block.0):
2003  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2004  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2005  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2006  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2007  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2008  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2009  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2010  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2011  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2012  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2013  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
2014  ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095
2015  ; GFX8: bb.1 (%ir-block.0):
2016  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2017  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2018  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2019  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2020  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2021  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2022  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2023  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2024  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2025  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2026  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
2027  %soffset = add i32 %soffset.base, 4095
2028  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2029  ret float %val
2030}
2031
2032define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2033  ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096
2034  ; GFX6: bb.1 (%ir-block.0):
2035  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2036  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2037  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2038  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2039  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2040  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2041  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2042  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
2043  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2044  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2045  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
2046  ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096
2047  ; GFX7: bb.1 (%ir-block.0):
2048  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2049  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2050  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2051  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2052  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2053  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2054  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2055  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
2056  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2057  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2058  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
2059  ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096
2060  ; GFX8: bb.1 (%ir-block.0):
2061  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2062  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2063  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2064  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2065  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2066  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2067  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2068  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
2069  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2070  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2071  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
2072  %soffset = add i32 %soffset.base, 4096
2073  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2074  ret float %val
2075}
2076
2077; Make sure the base offset is added to each split load.
2078define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2079  ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064
2080  ; GFX6: bb.1 (%ir-block.0):
2081  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2082  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2083  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2084  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2085  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2086  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2087  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2088  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2089  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2090  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2091  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2092  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2093  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2094  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2095  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2096  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2097  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2098  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2099  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2100  ; GFX6:   $vgpr0 = COPY [[COPY5]]
2101  ; GFX6:   $vgpr1 = COPY [[COPY6]]
2102  ; GFX6:   $vgpr2 = COPY [[COPY7]]
2103  ; GFX6:   $vgpr3 = COPY [[COPY8]]
2104  ; GFX6:   $vgpr4 = COPY [[COPY9]]
2105  ; GFX6:   $vgpr5 = COPY [[COPY10]]
2106  ; GFX6:   $vgpr6 = COPY [[COPY11]]
2107  ; GFX6:   $vgpr7 = COPY [[COPY12]]
2108  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2109  ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064
2110  ; GFX7: bb.1 (%ir-block.0):
2111  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2112  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2113  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2114  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2115  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2116  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2117  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2118  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2119  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2120  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2121  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2122  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2123  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2124  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2125  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2126  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2127  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2128  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2129  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2130  ; GFX7:   $vgpr0 = COPY [[COPY5]]
2131  ; GFX7:   $vgpr1 = COPY [[COPY6]]
2132  ; GFX7:   $vgpr2 = COPY [[COPY7]]
2133  ; GFX7:   $vgpr3 = COPY [[COPY8]]
2134  ; GFX7:   $vgpr4 = COPY [[COPY9]]
2135  ; GFX7:   $vgpr5 = COPY [[COPY10]]
2136  ; GFX7:   $vgpr6 = COPY [[COPY11]]
2137  ; GFX7:   $vgpr7 = COPY [[COPY12]]
2138  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2139  ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064
2140  ; GFX8: bb.1 (%ir-block.0):
2141  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2142  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2143  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2144  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2145  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2146  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2147  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2148  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2149  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2150  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2151  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2152  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2153  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2154  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2155  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2156  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2157  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2158  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2159  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2160  ; GFX8:   $vgpr0 = COPY [[COPY5]]
2161  ; GFX8:   $vgpr1 = COPY [[COPY6]]
2162  ; GFX8:   $vgpr2 = COPY [[COPY7]]
2163  ; GFX8:   $vgpr3 = COPY [[COPY8]]
2164  ; GFX8:   $vgpr4 = COPY [[COPY9]]
2165  ; GFX8:   $vgpr5 = COPY [[COPY10]]
2166  ; GFX8:   $vgpr6 = COPY [[COPY11]]
2167  ; GFX8:   $vgpr7 = COPY [[COPY12]]
2168  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2169  %soffset = add i32 %soffset.base, 4064
2170  %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2171  ret <8 x float> %val
2172}
2173
2174; Make sure the maximum offset isn't exeeded when splitting this
2175define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2176  ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068
2177  ; GFX6: bb.1 (%ir-block.0):
2178  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2179  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2180  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2181  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2182  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2183  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2184  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2185  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
2186  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2187  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2188  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2189  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2190  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2191  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2192  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2193  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2194  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2195  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2196  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2197  ; GFX6:   $vgpr0 = COPY [[COPY5]]
2198  ; GFX6:   $vgpr1 = COPY [[COPY6]]
2199  ; GFX6:   $vgpr2 = COPY [[COPY7]]
2200  ; GFX6:   $vgpr3 = COPY [[COPY8]]
2201  ; GFX6:   $vgpr4 = COPY [[COPY9]]
2202  ; GFX6:   $vgpr5 = COPY [[COPY10]]
2203  ; GFX6:   $vgpr6 = COPY [[COPY11]]
2204  ; GFX6:   $vgpr7 = COPY [[COPY12]]
2205  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2206  ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068
2207  ; GFX7: bb.1 (%ir-block.0):
2208  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2209  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2210  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2211  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2212  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2213  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2214  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2215  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
2216  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2217  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2218  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2219  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2220  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2221  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2222  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2223  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2224  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2225  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2226  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2227  ; GFX7:   $vgpr0 = COPY [[COPY5]]
2228  ; GFX7:   $vgpr1 = COPY [[COPY6]]
2229  ; GFX7:   $vgpr2 = COPY [[COPY7]]
2230  ; GFX7:   $vgpr3 = COPY [[COPY8]]
2231  ; GFX7:   $vgpr4 = COPY [[COPY9]]
2232  ; GFX7:   $vgpr5 = COPY [[COPY10]]
2233  ; GFX7:   $vgpr6 = COPY [[COPY11]]
2234  ; GFX7:   $vgpr7 = COPY [[COPY12]]
2235  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2236  ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068
2237  ; GFX8: bb.1 (%ir-block.0):
2238  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2239  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2240  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2241  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2242  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2243  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2244  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2245  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4
2246  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2247  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2248  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2249  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2250  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2251  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2252  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2253  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2254  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2255  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2256  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2257  ; GFX8:   $vgpr0 = COPY [[COPY5]]
2258  ; GFX8:   $vgpr1 = COPY [[COPY6]]
2259  ; GFX8:   $vgpr2 = COPY [[COPY7]]
2260  ; GFX8:   $vgpr3 = COPY [[COPY8]]
2261  ; GFX8:   $vgpr4 = COPY [[COPY9]]
2262  ; GFX8:   $vgpr5 = COPY [[COPY10]]
2263  ; GFX8:   $vgpr6 = COPY [[COPY11]]
2264  ; GFX8:   $vgpr7 = COPY [[COPY12]]
2265  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2266  %soffset = add i32 %soffset.base, 4068
2267  %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2268  ret <8 x float> %val
2269}
2270
2271define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2272  ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032
2273  ; GFX6: bb.1 (%ir-block.0):
2274  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2275  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2276  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2277  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2278  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2279  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2280  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2281  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2282  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2283  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2284  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4)
2285  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4)
2286  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2287  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2288  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2289  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2290  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2291  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2292  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2293  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2294  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2295  ; GFX6:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2296  ; GFX6:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2297  ; GFX6:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2298  ; GFX6:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2299  ; GFX6:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2300  ; GFX6:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2301  ; GFX6:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2302  ; GFX6:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2303  ; GFX6:   $vgpr0 = COPY [[COPY5]]
2304  ; GFX6:   $vgpr1 = COPY [[COPY6]]
2305  ; GFX6:   $vgpr2 = COPY [[COPY7]]
2306  ; GFX6:   $vgpr3 = COPY [[COPY8]]
2307  ; GFX6:   $vgpr4 = COPY [[COPY9]]
2308  ; GFX6:   $vgpr5 = COPY [[COPY10]]
2309  ; GFX6:   $vgpr6 = COPY [[COPY11]]
2310  ; GFX6:   $vgpr7 = COPY [[COPY12]]
2311  ; GFX6:   $vgpr8 = COPY [[COPY13]]
2312  ; GFX6:   $vgpr9 = COPY [[COPY14]]
2313  ; GFX6:   $vgpr10 = COPY [[COPY15]]
2314  ; GFX6:   $vgpr11 = COPY [[COPY16]]
2315  ; GFX6:   $vgpr12 = COPY [[COPY17]]
2316  ; GFX6:   $vgpr13 = COPY [[COPY18]]
2317  ; GFX6:   $vgpr14 = COPY [[COPY19]]
2318  ; GFX6:   $vgpr15 = COPY [[COPY20]]
2319  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2320  ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032
2321  ; GFX7: bb.1 (%ir-block.0):
2322  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2323  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2324  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2325  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2326  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2327  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2328  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2329  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2330  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2331  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2332  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4)
2333  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4)
2334  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2335  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2336  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2337  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2338  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2339  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2340  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2341  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2342  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2343  ; GFX7:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2344  ; GFX7:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2345  ; GFX7:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2346  ; GFX7:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2347  ; GFX7:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2348  ; GFX7:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2349  ; GFX7:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2350  ; GFX7:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2351  ; GFX7:   $vgpr0 = COPY [[COPY5]]
2352  ; GFX7:   $vgpr1 = COPY [[COPY6]]
2353  ; GFX7:   $vgpr2 = COPY [[COPY7]]
2354  ; GFX7:   $vgpr3 = COPY [[COPY8]]
2355  ; GFX7:   $vgpr4 = COPY [[COPY9]]
2356  ; GFX7:   $vgpr5 = COPY [[COPY10]]
2357  ; GFX7:   $vgpr6 = COPY [[COPY11]]
2358  ; GFX7:   $vgpr7 = COPY [[COPY12]]
2359  ; GFX7:   $vgpr8 = COPY [[COPY13]]
2360  ; GFX7:   $vgpr9 = COPY [[COPY14]]
2361  ; GFX7:   $vgpr10 = COPY [[COPY15]]
2362  ; GFX7:   $vgpr11 = COPY [[COPY16]]
2363  ; GFX7:   $vgpr12 = COPY [[COPY17]]
2364  ; GFX7:   $vgpr13 = COPY [[COPY18]]
2365  ; GFX7:   $vgpr14 = COPY [[COPY19]]
2366  ; GFX7:   $vgpr15 = COPY [[COPY20]]
2367  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2368  ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032
2369  ; GFX8: bb.1 (%ir-block.0):
2370  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2371  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2372  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2373  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2374  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2375  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2376  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2377  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2378  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2379  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2380  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4)
2381  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4)
2382  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2383  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2384  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2385  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2386  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2387  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2388  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2389  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2390  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2391  ; GFX8:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2392  ; GFX8:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2393  ; GFX8:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2394  ; GFX8:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2395  ; GFX8:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2396  ; GFX8:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2397  ; GFX8:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2398  ; GFX8:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2399  ; GFX8:   $vgpr0 = COPY [[COPY5]]
2400  ; GFX8:   $vgpr1 = COPY [[COPY6]]
2401  ; GFX8:   $vgpr2 = COPY [[COPY7]]
2402  ; GFX8:   $vgpr3 = COPY [[COPY8]]
2403  ; GFX8:   $vgpr4 = COPY [[COPY9]]
2404  ; GFX8:   $vgpr5 = COPY [[COPY10]]
2405  ; GFX8:   $vgpr6 = COPY [[COPY11]]
2406  ; GFX8:   $vgpr7 = COPY [[COPY12]]
2407  ; GFX8:   $vgpr8 = COPY [[COPY13]]
2408  ; GFX8:   $vgpr9 = COPY [[COPY14]]
2409  ; GFX8:   $vgpr10 = COPY [[COPY15]]
2410  ; GFX8:   $vgpr11 = COPY [[COPY16]]
2411  ; GFX8:   $vgpr12 = COPY [[COPY17]]
2412  ; GFX8:   $vgpr13 = COPY [[COPY18]]
2413  ; GFX8:   $vgpr14 = COPY [[COPY19]]
2414  ; GFX8:   $vgpr15 = COPY [[COPY20]]
2415  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2416  %soffset = add i32 %soffset.base, 4032
2417  %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2418  ret <16 x float> %val
2419}
2420
2421define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2422  ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036
2423  ; GFX6: bb.1 (%ir-block.0):
2424  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2425  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2426  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2427  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2428  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2429  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2430  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2431  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036
2432  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2433  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2434  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4)
2435  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4)
2436  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2437  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2438  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2439  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2440  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2441  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2442  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2443  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2444  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2445  ; GFX6:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2446  ; GFX6:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2447  ; GFX6:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2448  ; GFX6:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2449  ; GFX6:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2450  ; GFX6:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2451  ; GFX6:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2452  ; GFX6:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2453  ; GFX6:   $vgpr0 = COPY [[COPY5]]
2454  ; GFX6:   $vgpr1 = COPY [[COPY6]]
2455  ; GFX6:   $vgpr2 = COPY [[COPY7]]
2456  ; GFX6:   $vgpr3 = COPY [[COPY8]]
2457  ; GFX6:   $vgpr4 = COPY [[COPY9]]
2458  ; GFX6:   $vgpr5 = COPY [[COPY10]]
2459  ; GFX6:   $vgpr6 = COPY [[COPY11]]
2460  ; GFX6:   $vgpr7 = COPY [[COPY12]]
2461  ; GFX6:   $vgpr8 = COPY [[COPY13]]
2462  ; GFX6:   $vgpr9 = COPY [[COPY14]]
2463  ; GFX6:   $vgpr10 = COPY [[COPY15]]
2464  ; GFX6:   $vgpr11 = COPY [[COPY16]]
2465  ; GFX6:   $vgpr12 = COPY [[COPY17]]
2466  ; GFX6:   $vgpr13 = COPY [[COPY18]]
2467  ; GFX6:   $vgpr14 = COPY [[COPY19]]
2468  ; GFX6:   $vgpr15 = COPY [[COPY20]]
2469  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2470  ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036
2471  ; GFX7: bb.1 (%ir-block.0):
2472  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2473  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2474  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2475  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2476  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2477  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2478  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2479  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036
2480  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2481  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2482  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4)
2483  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4)
2484  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2485  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2486  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2487  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2488  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2489  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2490  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2491  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2492  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2493  ; GFX7:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2494  ; GFX7:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2495  ; GFX7:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2496  ; GFX7:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2497  ; GFX7:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2498  ; GFX7:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2499  ; GFX7:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2500  ; GFX7:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2501  ; GFX7:   $vgpr0 = COPY [[COPY5]]
2502  ; GFX7:   $vgpr1 = COPY [[COPY6]]
2503  ; GFX7:   $vgpr2 = COPY [[COPY7]]
2504  ; GFX7:   $vgpr3 = COPY [[COPY8]]
2505  ; GFX7:   $vgpr4 = COPY [[COPY9]]
2506  ; GFX7:   $vgpr5 = COPY [[COPY10]]
2507  ; GFX7:   $vgpr6 = COPY [[COPY11]]
2508  ; GFX7:   $vgpr7 = COPY [[COPY12]]
2509  ; GFX7:   $vgpr8 = COPY [[COPY13]]
2510  ; GFX7:   $vgpr9 = COPY [[COPY14]]
2511  ; GFX7:   $vgpr10 = COPY [[COPY15]]
2512  ; GFX7:   $vgpr11 = COPY [[COPY16]]
2513  ; GFX7:   $vgpr12 = COPY [[COPY17]]
2514  ; GFX7:   $vgpr13 = COPY [[COPY18]]
2515  ; GFX7:   $vgpr14 = COPY [[COPY19]]
2516  ; GFX7:   $vgpr15 = COPY [[COPY20]]
2517  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2518  ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036
2519  ; GFX8: bb.1 (%ir-block.0):
2520  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2521  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2522  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2523  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2524  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2525  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2526  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2527  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4
2528  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2529  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2530  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4)
2531  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4)
2532  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2533  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2534  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2535  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2536  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2537  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2538  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2539  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2540  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2541  ; GFX8:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2542  ; GFX8:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2543  ; GFX8:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2544  ; GFX8:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2545  ; GFX8:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2546  ; GFX8:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2547  ; GFX8:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2548  ; GFX8:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2549  ; GFX8:   $vgpr0 = COPY [[COPY5]]
2550  ; GFX8:   $vgpr1 = COPY [[COPY6]]
2551  ; GFX8:   $vgpr2 = COPY [[COPY7]]
2552  ; GFX8:   $vgpr3 = COPY [[COPY8]]
2553  ; GFX8:   $vgpr4 = COPY [[COPY9]]
2554  ; GFX8:   $vgpr5 = COPY [[COPY10]]
2555  ; GFX8:   $vgpr6 = COPY [[COPY11]]
2556  ; GFX8:   $vgpr7 = COPY [[COPY12]]
2557  ; GFX8:   $vgpr8 = COPY [[COPY13]]
2558  ; GFX8:   $vgpr9 = COPY [[COPY14]]
2559  ; GFX8:   $vgpr10 = COPY [[COPY15]]
2560  ; GFX8:   $vgpr11 = COPY [[COPY16]]
2561  ; GFX8:   $vgpr12 = COPY [[COPY17]]
2562  ; GFX8:   $vgpr13 = COPY [[COPY18]]
2563  ; GFX8:   $vgpr14 = COPY [[COPY19]]
2564  ; GFX8:   $vgpr15 = COPY [[COPY20]]
2565  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2566  %soffset = add i32 %soffset.base, 4036
2567  %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2568  ret <16 x float> %val
2569}
2570
2571; Waterfall loop due to resource being VGPR
2572define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %soffset) {
2573  ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc
2574  ; GFX6: bb.1 (%ir-block.0):
2575  ; GFX6:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2576  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2577  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2578  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2579  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2580  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2581  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2582  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
2583  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2584  ; GFX6:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2585  ; GFX6:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2586  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2587  ; GFX6: bb.2:
2588  ; GFX6:   successors: %bb.3, %bb.2
2589  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
2590  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
2591  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2592  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
2593  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
2594  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
2595  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2596  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
2597  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2598  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2599  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2600  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2601  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2602  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
2603  ; GFX6: bb.3:
2604  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
2605  ; GFX6: bb.4:
2606  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2607  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
2608  ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc
2609  ; GFX7: bb.1 (%ir-block.0):
2610  ; GFX7:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2611  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2612  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2613  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2614  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2615  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2616  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2617  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
2618  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2619  ; GFX7:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2620  ; GFX7:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2621  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2622  ; GFX7: bb.2:
2623  ; GFX7:   successors: %bb.3, %bb.2
2624  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
2625  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
2626  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2627  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
2628  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
2629  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
2630  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2631  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
2632  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2633  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2634  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2635  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2636  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2637  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
2638  ; GFX7: bb.3:
2639  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
2640  ; GFX7: bb.4:
2641  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2642  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
2643  ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc
2644  ; GFX8: bb.1 (%ir-block.0):
2645  ; GFX8:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2646  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2647  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2648  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2649  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2650  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2651  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2652  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
2653  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2654  ; GFX8:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2655  ; GFX8:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2656  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2657  ; GFX8: bb.2:
2658  ; GFX8:   successors: %bb.3, %bb.2
2659  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
2660  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
2661  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2662  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
2663  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
2664  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
2665  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2666  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
2667  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2668  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2669  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2670  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2671  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2672  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
2673  ; GFX8: bb.3:
2674  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
2675  ; GFX8: bb.4:
2676  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2677  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
2678  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2679  ret float %val
2680}
2681
2682; Use the offset inside the waterfall loop
2683define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %rsrc, i32 inreg %soffset.base) {
2684  ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092
2685  ; GFX6: bb.1 (%ir-block.0):
2686  ; GFX6:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2687  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2688  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2689  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2690  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2691  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2692  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2693  ; GFX6:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2694  ; GFX6:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2695  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2696  ; GFX6: bb.2:
2697  ; GFX6:   successors: %bb.3, %bb.2
2698  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
2699  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
2700  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2701  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
2702  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
2703  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
2704  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2705  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
2706  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2707  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2708  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2709  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2710  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2711  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
2712  ; GFX6: bb.3:
2713  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
2714  ; GFX6: bb.4:
2715  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
2716  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
2717  ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092
2718  ; GFX7: bb.1 (%ir-block.0):
2719  ; GFX7:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2720  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2721  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2722  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2723  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2724  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2725  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2726  ; GFX7:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2727  ; GFX7:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2728  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2729  ; GFX7: bb.2:
2730  ; GFX7:   successors: %bb.3, %bb.2
2731  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
2732  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
2733  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2734  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
2735  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
2736  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
2737  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2738  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
2739  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2740  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2741  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2742  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2743  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2744  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
2745  ; GFX7: bb.3:
2746  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
2747  ; GFX7: bb.4:
2748  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
2749  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
2750  ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092
2751  ; GFX8: bb.1 (%ir-block.0):
2752  ; GFX8:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2753  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2754  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2755  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2756  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2757  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2758  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2759  ; GFX8:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2760  ; GFX8:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2761  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2762  ; GFX8: bb.2:
2763  ; GFX8:   successors: %bb.3, %bb.2
2764  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
2765  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
2766  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2767  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
2768  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
2769  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
2770  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2771  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
2772  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2773  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2774  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2775  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2776  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2777  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
2778  ; GFX8: bb.3:
2779  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
2780  ; GFX8: bb.4:
2781  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
2782  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
2783  %soffset = add i32 %soffset.base, 4092
2784  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2785  ret float %val
2786}
2787
2788; Scalar offset exceeds MUBUF limit, keep add out of the loop
2789define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) {
2790  ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096
2791  ; GFX6: bb.1 (%ir-block.0):
2792  ; GFX6:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2793  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2794  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2795  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2796  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2797  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2798  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2799  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
2800  ; GFX6:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
2801  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
2802  ; GFX6:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2803  ; GFX6:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2804  ; GFX6:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2805  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2806  ; GFX6: bb.2:
2807  ; GFX6:   successors: %bb.3, %bb.2
2808  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
2809  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
2810  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2811  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
2812  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
2813  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
2814  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2815  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
2816  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2817  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2818  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2819  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2820  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2821  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
2822  ; GFX6: bb.3:
2823  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
2824  ; GFX6: bb.4:
2825  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2826  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
2827  ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096
2828  ; GFX7: bb.1 (%ir-block.0):
2829  ; GFX7:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2830  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2831  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2832  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2833  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2834  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2835  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2836  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
2837  ; GFX7:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
2838  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
2839  ; GFX7:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2840  ; GFX7:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2841  ; GFX7:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2842  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2843  ; GFX7: bb.2:
2844  ; GFX7:   successors: %bb.3, %bb.2
2845  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
2846  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
2847  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2848  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
2849  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
2850  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
2851  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2852  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
2853  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2854  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2855  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2856  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2857  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2858  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
2859  ; GFX7: bb.3:
2860  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
2861  ; GFX7: bb.4:
2862  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2863  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
2864  ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096
2865  ; GFX8: bb.1 (%ir-block.0):
2866  ; GFX8:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2867  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2868  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2869  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2870  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2871  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2872  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2873  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
2874  ; GFX8:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
2875  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
2876  ; GFX8:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2877  ; GFX8:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2878  ; GFX8:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2879  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2880  ; GFX8: bb.2:
2881  ; GFX8:   successors: %bb.3, %bb.2
2882  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
2883  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
2884  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2885  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
2886  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
2887  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
2888  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2889  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
2890  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2891  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2892  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2893  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2894  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2895  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
2896  ; GFX8: bb.3:
2897  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
2898  ; GFX8: bb.4:
2899  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2900  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
2901  %soffset = add i32 %soffset.base, 4096
2902  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2903  ret float %val
2904}
2905
2906; Waterfall loop, but constant offset
2907define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) {
2908  ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095
2909  ; GFX6: bb.1 (%ir-block.0):
2910  ; GFX6:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
2911  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2912  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2913  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2914  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2915  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2916  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2917  ; GFX6:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2918  ; GFX6:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2919  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2920  ; GFX6: bb.2:
2921  ; GFX6:   successors: %bb.3, %bb.2
2922  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
2923  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
2924  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2925  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
2926  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
2927  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
2928  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2929  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
2930  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2931  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2932  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 + 4095, align 1)
2933  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2934  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2935  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
2936  ; GFX6: bb.3:
2937  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
2938  ; GFX6: bb.4:
2939  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
2940  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
2941  ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095
2942  ; GFX7: bb.1 (%ir-block.0):
2943  ; GFX7:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
2944  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2945  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2946  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2947  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2948  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2949  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2950  ; GFX7:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2951  ; GFX7:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2952  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2953  ; GFX7: bb.2:
2954  ; GFX7:   successors: %bb.3, %bb.2
2955  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
2956  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
2957  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2958  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
2959  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
2960  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
2961  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2962  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
2963  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2964  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2965  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 + 4095, align 1)
2966  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2967  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2968  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
2969  ; GFX7: bb.3:
2970  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
2971  ; GFX7: bb.4:
2972  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
2973  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
2974  ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095
2975  ; GFX8: bb.1 (%ir-block.0):
2976  ; GFX8:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
2977  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2978  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2979  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2980  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2981  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2982  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2983  ; GFX8:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2984  ; GFX8:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2985  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2986  ; GFX8: bb.2:
2987  ; GFX8:   successors: %bb.3, %bb.2
2988  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
2989  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
2990  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2991  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
2992  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
2993  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
2994  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2995  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
2996  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2997  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2998  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 + 4095, align 1)
2999  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3000  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3001  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3002  ; GFX8: bb.3:
3003  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3004  ; GFX8: bb.4:
3005  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
3006  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
3007  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 0)
3008  ret float %val
3009}
3010
3011; Waterfall loop, but constant offset
3012define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) {
3013  ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096
3014  ; GFX6: bb.1 (%ir-block.0):
3015  ; GFX6:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
3016  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3017  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3018  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3019  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3020  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3021  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3022  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
3023  ; GFX6:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3024  ; GFX6:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3025  ; GFX6:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3026  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3027  ; GFX6: bb.2:
3028  ; GFX6:   successors: %bb.3, %bb.2
3029  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3030  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3031  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3032  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3033  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3034  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3035  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3036  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3037  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3038  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3039  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
3040  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3041  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3042  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3043  ; GFX6: bb.3:
3044  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3045  ; GFX6: bb.4:
3046  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
3047  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
3048  ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096
3049  ; GFX7: bb.1 (%ir-block.0):
3050  ; GFX7:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
3051  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3052  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3053  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3054  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3055  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3056  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3057  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
3058  ; GFX7:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3059  ; GFX7:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3060  ; GFX7:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3061  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3062  ; GFX7: bb.2:
3063  ; GFX7:   successors: %bb.3, %bb.2
3064  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3065  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3066  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3067  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3068  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3069  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3070  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3071  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3072  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3073  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3074  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
3075  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3076  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3077  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3078  ; GFX7: bb.3:
3079  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3080  ; GFX7: bb.4:
3081  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
3082  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
3083  ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096
3084  ; GFX8: bb.1 (%ir-block.0):
3085  ; GFX8:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
3086  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3087  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3088  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3089  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3090  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3091  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
3092  ; GFX8:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3093  ; GFX8:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3094  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3095  ; GFX8: bb.2:
3096  ; GFX8:   successors: %bb.3, %bb.2
3097  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
3098  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
3099  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3100  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
3101  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3102  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3103  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3104  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
3105  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3106  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3107  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 + 4096)
3108  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3109  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3110  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3111  ; GFX8: bb.3:
3112  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3113  ; GFX8: bb.4:
3114  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
3115  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
3116  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 0)
3117  ret float %val
3118}
3119
3120; Need a waterfall loop, but the offset is scalar.
3121; Make sure the base offset is added to each split load.
3122define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %rsrc, i32 inreg %soffset.base) {
3123  ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064
3124  ; GFX6: bb.1 (%ir-block.0):
3125  ; GFX6:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3126  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3127  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3128  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3129  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3130  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3131  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3132  ; GFX6:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3133  ; GFX6:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3134  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3135  ; GFX6: bb.2:
3136  ; GFX6:   successors: %bb.3, %bb.2
3137  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3138  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3139  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3140  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3141  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3142  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3143  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3144  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3145  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3146  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3147  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3148  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3149  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3150  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3151  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3152  ; GFX6: bb.3:
3153  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3154  ; GFX6: bb.4:
3155  ; GFX6:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
3156  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3157  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3158  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3159  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3160  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3161  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3162  ; GFX6:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3163  ; GFX6:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3164  ; GFX6:   $vgpr0 = COPY [[COPY7]]
3165  ; GFX6:   $vgpr1 = COPY [[COPY8]]
3166  ; GFX6:   $vgpr2 = COPY [[COPY9]]
3167  ; GFX6:   $vgpr3 = COPY [[COPY10]]
3168  ; GFX6:   $vgpr4 = COPY [[COPY11]]
3169  ; GFX6:   $vgpr5 = COPY [[COPY12]]
3170  ; GFX6:   $vgpr6 = COPY [[COPY13]]
3171  ; GFX6:   $vgpr7 = COPY [[COPY14]]
3172  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3173  ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064
3174  ; GFX7: bb.1 (%ir-block.0):
3175  ; GFX7:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3176  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3177  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3178  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3179  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3180  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3181  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3182  ; GFX7:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3183  ; GFX7:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3184  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3185  ; GFX7: bb.2:
3186  ; GFX7:   successors: %bb.3, %bb.2
3187  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3188  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3189  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3190  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3191  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3192  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3193  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3194  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3195  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3196  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3197  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3198  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3199  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3200  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3201  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3202  ; GFX7: bb.3:
3203  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3204  ; GFX7: bb.4:
3205  ; GFX7:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
3206  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3207  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3208  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3209  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3210  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3211  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3212  ; GFX7:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3213  ; GFX7:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3214  ; GFX7:   $vgpr0 = COPY [[COPY7]]
3215  ; GFX7:   $vgpr1 = COPY [[COPY8]]
3216  ; GFX7:   $vgpr2 = COPY [[COPY9]]
3217  ; GFX7:   $vgpr3 = COPY [[COPY10]]
3218  ; GFX7:   $vgpr4 = COPY [[COPY11]]
3219  ; GFX7:   $vgpr5 = COPY [[COPY12]]
3220  ; GFX7:   $vgpr6 = COPY [[COPY13]]
3221  ; GFX7:   $vgpr7 = COPY [[COPY14]]
3222  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3223  ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064
3224  ; GFX8: bb.1 (%ir-block.0):
3225  ; GFX8:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3226  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3227  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3228  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3229  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3230  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3231  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3232  ; GFX8:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3233  ; GFX8:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3234  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3235  ; GFX8: bb.2:
3236  ; GFX8:   successors: %bb.3, %bb.2
3237  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3238  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3239  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3240  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3241  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3242  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3243  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3244  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3245  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3246  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3247  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3248  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3249  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3250  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3251  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3252  ; GFX8: bb.3:
3253  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3254  ; GFX8: bb.4:
3255  ; GFX8:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
3256  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3257  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3258  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3259  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3260  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3261  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3262  ; GFX8:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3263  ; GFX8:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3264  ; GFX8:   $vgpr0 = COPY [[COPY7]]
3265  ; GFX8:   $vgpr1 = COPY [[COPY8]]
3266  ; GFX8:   $vgpr2 = COPY [[COPY9]]
3267  ; GFX8:   $vgpr3 = COPY [[COPY10]]
3268  ; GFX8:   $vgpr4 = COPY [[COPY11]]
3269  ; GFX8:   $vgpr5 = COPY [[COPY12]]
3270  ; GFX8:   $vgpr6 = COPY [[COPY13]]
3271  ; GFX8:   $vgpr7 = COPY [[COPY14]]
3272  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3273  %soffset = add i32 %soffset.base, 4064
3274  %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
3275  ret <8 x float> %val
3276}
3277
3278; Need a waterfall loop, but the offset is scalar.
3279; Make sure the maximum offset isn't exeeded when splitting this
3280define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %rsrc, i32 inreg %soffset.base) {
3281  ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068
3282  ; GFX6: bb.1 (%ir-block.0):
3283  ; GFX6:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3284  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3285  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3286  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3287  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3288  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3289  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3290  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
3291  ; GFX6:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
3292  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3293  ; GFX6:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3294  ; GFX6:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3295  ; GFX6:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3296  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3297  ; GFX6: bb.2:
3298  ; GFX6:   successors: %bb.3, %bb.2
3299  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3300  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3301  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3302  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
3303  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
3304  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
3305  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3306  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
3307  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3308  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3309  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3310  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3311  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3312  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3313  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3314  ; GFX6: bb.3:
3315  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3316  ; GFX6: bb.4:
3317  ; GFX6:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3318  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3319  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3320  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3321  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3322  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3323  ; GFX6:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3324  ; GFX6:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3325  ; GFX6:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3326  ; GFX6:   $vgpr0 = COPY [[COPY8]]
3327  ; GFX6:   $vgpr1 = COPY [[COPY9]]
3328  ; GFX6:   $vgpr2 = COPY [[COPY10]]
3329  ; GFX6:   $vgpr3 = COPY [[COPY11]]
3330  ; GFX6:   $vgpr4 = COPY [[COPY12]]
3331  ; GFX6:   $vgpr5 = COPY [[COPY13]]
3332  ; GFX6:   $vgpr6 = COPY [[COPY14]]
3333  ; GFX6:   $vgpr7 = COPY [[COPY15]]
3334  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3335  ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068
3336  ; GFX7: bb.1 (%ir-block.0):
3337  ; GFX7:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3338  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3339  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3340  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3341  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3342  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3343  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3344  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
3345  ; GFX7:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
3346  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3347  ; GFX7:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3348  ; GFX7:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3349  ; GFX7:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3350  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3351  ; GFX7: bb.2:
3352  ; GFX7:   successors: %bb.3, %bb.2
3353  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3354  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3355  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3356  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
3357  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
3358  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
3359  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3360  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
3361  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3362  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3363  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3364  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3365  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3366  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3367  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3368  ; GFX7: bb.3:
3369  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3370  ; GFX7: bb.4:
3371  ; GFX7:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3372  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3373  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3374  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3375  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3376  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3377  ; GFX7:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3378  ; GFX7:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3379  ; GFX7:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3380  ; GFX7:   $vgpr0 = COPY [[COPY8]]
3381  ; GFX7:   $vgpr1 = COPY [[COPY9]]
3382  ; GFX7:   $vgpr2 = COPY [[COPY10]]
3383  ; GFX7:   $vgpr3 = COPY [[COPY11]]
3384  ; GFX7:   $vgpr4 = COPY [[COPY12]]
3385  ; GFX7:   $vgpr5 = COPY [[COPY13]]
3386  ; GFX7:   $vgpr6 = COPY [[COPY14]]
3387  ; GFX7:   $vgpr7 = COPY [[COPY15]]
3388  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3389  ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068
3390  ; GFX8: bb.1 (%ir-block.0):
3391  ; GFX8:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3392  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3393  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3394  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3395  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3396  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3397  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3398  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
3399  ; GFX8:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
3400  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3401  ; GFX8:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3402  ; GFX8:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3403  ; GFX8:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3404  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3405  ; GFX8: bb.2:
3406  ; GFX8:   successors: %bb.3, %bb.2
3407  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3408  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3409  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3410  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
3411  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
3412  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
3413  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3414  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
3415  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3416  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3417  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3418  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3419  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3420  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3421  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3422  ; GFX8: bb.3:
3423  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3424  ; GFX8: bb.4:
3425  ; GFX8:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3426  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3427  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3428  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3429  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3430  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3431  ; GFX8:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3432  ; GFX8:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3433  ; GFX8:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3434  ; GFX8:   $vgpr0 = COPY [[COPY8]]
3435  ; GFX8:   $vgpr1 = COPY [[COPY9]]
3436  ; GFX8:   $vgpr2 = COPY [[COPY10]]
3437  ; GFX8:   $vgpr3 = COPY [[COPY11]]
3438  ; GFX8:   $vgpr4 = COPY [[COPY12]]
3439  ; GFX8:   $vgpr5 = COPY [[COPY13]]
3440  ; GFX8:   $vgpr6 = COPY [[COPY14]]
3441  ; GFX8:   $vgpr7 = COPY [[COPY15]]
3442  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3443  %soffset = add i32 %soffset.base, 4068
3444  %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
3445  ret <8 x float> %val
3446}
3447
3448define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) {
3449  ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096
3450  ; GFX6: bb.1 (%ir-block.0):
3451  ; GFX6:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3452  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3453  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3454  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3455  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3456  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3457  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3458  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3459  ; GFX6:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
3460  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3461  ; GFX6:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3462  ; GFX6:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3463  ; GFX6:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3464  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3465  ; GFX6: bb.2:
3466  ; GFX6:   successors: %bb.3, %bb.2
3467  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3468  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3469  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3470  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
3471  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
3472  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
3473  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3474  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
3475  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3476  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3477  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3478  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3479  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3480  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3481  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3482  ; GFX6: bb.3:
3483  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3484  ; GFX6: bb.4:
3485  ; GFX6:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3486  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3487  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3488  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3489  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3490  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3491  ; GFX6:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3492  ; GFX6:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3493  ; GFX6:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3494  ; GFX6:   $vgpr0 = COPY [[COPY8]]
3495  ; GFX6:   $vgpr1 = COPY [[COPY9]]
3496  ; GFX6:   $vgpr2 = COPY [[COPY10]]
3497  ; GFX6:   $vgpr3 = COPY [[COPY11]]
3498  ; GFX6:   $vgpr4 = COPY [[COPY12]]
3499  ; GFX6:   $vgpr5 = COPY [[COPY13]]
3500  ; GFX6:   $vgpr6 = COPY [[COPY14]]
3501  ; GFX6:   $vgpr7 = COPY [[COPY15]]
3502  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3503  ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096
3504  ; GFX7: bb.1 (%ir-block.0):
3505  ; GFX7:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3506  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3507  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3508  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3509  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3510  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3511  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3512  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3513  ; GFX7:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
3514  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3515  ; GFX7:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3516  ; GFX7:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3517  ; GFX7:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3518  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3519  ; GFX7: bb.2:
3520  ; GFX7:   successors: %bb.3, %bb.2
3521  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3522  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3523  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3524  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
3525  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
3526  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
3527  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3528  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
3529  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3530  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3531  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3532  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3533  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3534  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3535  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3536  ; GFX7: bb.3:
3537  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3538  ; GFX7: bb.4:
3539  ; GFX7:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3540  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3541  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3542  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3543  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3544  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3545  ; GFX7:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3546  ; GFX7:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3547  ; GFX7:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3548  ; GFX7:   $vgpr0 = COPY [[COPY8]]
3549  ; GFX7:   $vgpr1 = COPY [[COPY9]]
3550  ; GFX7:   $vgpr2 = COPY [[COPY10]]
3551  ; GFX7:   $vgpr3 = COPY [[COPY11]]
3552  ; GFX7:   $vgpr4 = COPY [[COPY12]]
3553  ; GFX7:   $vgpr5 = COPY [[COPY13]]
3554  ; GFX7:   $vgpr6 = COPY [[COPY14]]
3555  ; GFX7:   $vgpr7 = COPY [[COPY15]]
3556  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3557  ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096
3558  ; GFX8: bb.1 (%ir-block.0):
3559  ; GFX8:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3560  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3561  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3562  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3563  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3564  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3565  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3566  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3567  ; GFX8:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
3568  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3569  ; GFX8:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3570  ; GFX8:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3571  ; GFX8:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3572  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3573  ; GFX8: bb.2:
3574  ; GFX8:   successors: %bb.3, %bb.2
3575  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3576  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3577  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3578  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
3579  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
3580  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
3581  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3582  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
3583  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3584  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3585  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3586  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3587  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3588  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3589  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3590  ; GFX8: bb.3:
3591  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3592  ; GFX8: bb.4:
3593  ; GFX8:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3594  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3595  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3596  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3597  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3598  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3599  ; GFX8:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3600  ; GFX8:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3601  ; GFX8:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3602  ; GFX8:   $vgpr0 = COPY [[COPY8]]
3603  ; GFX8:   $vgpr1 = COPY [[COPY9]]
3604  ; GFX8:   $vgpr2 = COPY [[COPY10]]
3605  ; GFX8:   $vgpr3 = COPY [[COPY11]]
3606  ; GFX8:   $vgpr4 = COPY [[COPY12]]
3607  ; GFX8:   $vgpr5 = COPY [[COPY13]]
3608  ; GFX8:   $vgpr6 = COPY [[COPY14]]
3609  ; GFX8:   $vgpr7 = COPY [[COPY15]]
3610  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3611  %soffset = add i32 %soffset.base, 4096
3612  %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
3613  ret <8 x float> %val
3614}
3615
3616define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000(<4 x i32> %rsrc, i32 %offset.base) {
3617  ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
3618  ; GFX6: bb.1 (%ir-block.0):
3619  ; GFX6:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3620  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3621  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3622  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3623  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3624  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
3625  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3626  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000
3627  ; GFX6:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3628  ; GFX6:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3629  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3630  ; GFX6: bb.2:
3631  ; GFX6:   successors: %bb.3, %bb.2
3632  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3633  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3634  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3635  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3636  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3637  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3638  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3639  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3640  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3641  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3642  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3643  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3644  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3645  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3646  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3647  ; GFX6: bb.3:
3648  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3649  ; GFX6: bb.4:
3650  ; GFX6:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3651  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3652  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3653  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3654  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3655  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3656  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3657  ; GFX6:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3658  ; GFX6:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3659  ; GFX6:   $vgpr0 = COPY [[COPY7]]
3660  ; GFX6:   $vgpr1 = COPY [[COPY8]]
3661  ; GFX6:   $vgpr2 = COPY [[COPY9]]
3662  ; GFX6:   $vgpr3 = COPY [[COPY10]]
3663  ; GFX6:   $vgpr4 = COPY [[COPY11]]
3664  ; GFX6:   $vgpr5 = COPY [[COPY12]]
3665  ; GFX6:   $vgpr6 = COPY [[COPY13]]
3666  ; GFX6:   $vgpr7 = COPY [[COPY14]]
3667  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3668  ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
3669  ; GFX7: bb.1 (%ir-block.0):
3670  ; GFX7:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3671  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3672  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3673  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3674  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3675  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
3676  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3677  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000
3678  ; GFX7:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3679  ; GFX7:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3680  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3681  ; GFX7: bb.2:
3682  ; GFX7:   successors: %bb.3, %bb.2
3683  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3684  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3685  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3686  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3687  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3688  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3689  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3690  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3691  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3692  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3693  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3694  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3695  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3696  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3697  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3698  ; GFX7: bb.3:
3699  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3700  ; GFX7: bb.4:
3701  ; GFX7:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3702  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3703  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3704  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3705  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3706  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3707  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3708  ; GFX7:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3709  ; GFX7:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3710  ; GFX7:   $vgpr0 = COPY [[COPY7]]
3711  ; GFX7:   $vgpr1 = COPY [[COPY8]]
3712  ; GFX7:   $vgpr2 = COPY [[COPY9]]
3713  ; GFX7:   $vgpr3 = COPY [[COPY10]]
3714  ; GFX7:   $vgpr4 = COPY [[COPY11]]
3715  ; GFX7:   $vgpr5 = COPY [[COPY12]]
3716  ; GFX7:   $vgpr6 = COPY [[COPY13]]
3717  ; GFX7:   $vgpr7 = COPY [[COPY14]]
3718  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3719  ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
3720  ; GFX8: bb.1 (%ir-block.0):
3721  ; GFX8:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3722  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3723  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3724  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3725  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3726  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
3727  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3728  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4064
3729  ; GFX8:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3730  ; GFX8:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3731  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3732  ; GFX8: bb.2:
3733  ; GFX8:   successors: %bb.3, %bb.2
3734  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3735  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3736  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3737  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3738  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3739  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3740  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3741  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3742  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3743  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3744  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 936, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3745  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 952, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3746  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3747  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3748  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3749  ; GFX8: bb.3:
3750  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3751  ; GFX8: bb.4:
3752  ; GFX8:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3753  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3754  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3755  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3756  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3757  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3758  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3759  ; GFX8:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3760  ; GFX8:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3761  ; GFX8:   $vgpr0 = COPY [[COPY7]]
3762  ; GFX8:   $vgpr1 = COPY [[COPY8]]
3763  ; GFX8:   $vgpr2 = COPY [[COPY9]]
3764  ; GFX8:   $vgpr3 = COPY [[COPY10]]
3765  ; GFX8:   $vgpr4 = COPY [[COPY11]]
3766  ; GFX8:   $vgpr5 = COPY [[COPY12]]
3767  ; GFX8:   $vgpr6 = COPY [[COPY13]]
3768  ; GFX8:   $vgpr7 = COPY [[COPY14]]
3769  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3770  %soffset = add i32 %offset.base, 5000
3771  %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
3772  ret <8 x float> %val
3773}
3774
3775define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076(<4 x i32> %rsrc, i32 %offset.base) {
3776  ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
3777  ; GFX6: bb.1 (%ir-block.0):
3778  ; GFX6:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3779  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3780  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3781  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3782  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3783  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
3784  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3785  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076
3786  ; GFX6:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3787  ; GFX6:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3788  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3789  ; GFX6: bb.2:
3790  ; GFX6:   successors: %bb.3, %bb.2
3791  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3792  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3793  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3794  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3795  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3796  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3797  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3798  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3799  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3800  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3801  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3802  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3803  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3804  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3805  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3806  ; GFX6: bb.3:
3807  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3808  ; GFX6: bb.4:
3809  ; GFX6:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3810  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3811  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3812  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3813  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3814  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3815  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3816  ; GFX6:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3817  ; GFX6:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3818  ; GFX6:   $vgpr0 = COPY [[COPY7]]
3819  ; GFX6:   $vgpr1 = COPY [[COPY8]]
3820  ; GFX6:   $vgpr2 = COPY [[COPY9]]
3821  ; GFX6:   $vgpr3 = COPY [[COPY10]]
3822  ; GFX6:   $vgpr4 = COPY [[COPY11]]
3823  ; GFX6:   $vgpr5 = COPY [[COPY12]]
3824  ; GFX6:   $vgpr6 = COPY [[COPY13]]
3825  ; GFX6:   $vgpr7 = COPY [[COPY14]]
3826  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3827  ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
3828  ; GFX7: bb.1 (%ir-block.0):
3829  ; GFX7:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3830  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3831  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3832  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3833  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3834  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
3835  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3836  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076
3837  ; GFX7:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3838  ; GFX7:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3839  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3840  ; GFX7: bb.2:
3841  ; GFX7:   successors: %bb.3, %bb.2
3842  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3843  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3844  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3845  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3846  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3847  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3848  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3849  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3850  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3851  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3852  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3853  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3854  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3855  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3856  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3857  ; GFX7: bb.3:
3858  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3859  ; GFX7: bb.4:
3860  ; GFX7:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3861  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3862  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3863  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3864  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3865  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3866  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3867  ; GFX7:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3868  ; GFX7:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3869  ; GFX7:   $vgpr0 = COPY [[COPY7]]
3870  ; GFX7:   $vgpr1 = COPY [[COPY8]]
3871  ; GFX7:   $vgpr2 = COPY [[COPY9]]
3872  ; GFX7:   $vgpr3 = COPY [[COPY10]]
3873  ; GFX7:   $vgpr4 = COPY [[COPY11]]
3874  ; GFX7:   $vgpr5 = COPY [[COPY12]]
3875  ; GFX7:   $vgpr6 = COPY [[COPY13]]
3876  ; GFX7:   $vgpr7 = COPY [[COPY14]]
3877  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3878  ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
3879  ; GFX8: bb.1 (%ir-block.0):
3880  ; GFX8:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3881  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3882  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3883  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3884  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3885  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
3886  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3887  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12
3888  ; GFX8:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3889  ; GFX8:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3890  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3891  ; GFX8: bb.2:
3892  ; GFX8:   successors: %bb.3, %bb.2
3893  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3894  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3895  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3896  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3897  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3898  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3899  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3900  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3901  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3902  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3903  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3904  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3905  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3906  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3907  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3908  ; GFX8: bb.3:
3909  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3910  ; GFX8: bb.4:
3911  ; GFX8:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3912  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3913  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3914  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3915  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3916  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3917  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3918  ; GFX8:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3919  ; GFX8:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3920  ; GFX8:   $vgpr0 = COPY [[COPY7]]
3921  ; GFX8:   $vgpr1 = COPY [[COPY8]]
3922  ; GFX8:   $vgpr2 = COPY [[COPY9]]
3923  ; GFX8:   $vgpr3 = COPY [[COPY10]]
3924  ; GFX8:   $vgpr4 = COPY [[COPY11]]
3925  ; GFX8:   $vgpr5 = COPY [[COPY12]]
3926  ; GFX8:   $vgpr6 = COPY [[COPY13]]
3927  ; GFX8:   $vgpr7 = COPY [[COPY14]]
3928  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3929  %soffset = add i32 %offset.base, 4076
3930  %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
3931  ret <8 x float> %val
3932}
3933
3934define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080(<4 x i32> %rsrc, i32 %offset.base) {
3935  ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
3936  ; GFX6: bb.1 (%ir-block.0):
3937  ; GFX6:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3938  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3939  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3940  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3941  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3942  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
3943  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3944  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080
3945  ; GFX6:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3946  ; GFX6:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3947  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3948  ; GFX6: bb.2:
3949  ; GFX6:   successors: %bb.3, %bb.2
3950  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3951  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3952  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3953  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3954  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3955  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3956  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3957  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3958  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3959  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3960  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3961  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3962  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3963  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3964  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3965  ; GFX6: bb.3:
3966  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3967  ; GFX6: bb.4:
3968  ; GFX6:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3969  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3970  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3971  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3972  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3973  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3974  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3975  ; GFX6:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3976  ; GFX6:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3977  ; GFX6:   $vgpr0 = COPY [[COPY7]]
3978  ; GFX6:   $vgpr1 = COPY [[COPY8]]
3979  ; GFX6:   $vgpr2 = COPY [[COPY9]]
3980  ; GFX6:   $vgpr3 = COPY [[COPY10]]
3981  ; GFX6:   $vgpr4 = COPY [[COPY11]]
3982  ; GFX6:   $vgpr5 = COPY [[COPY12]]
3983  ; GFX6:   $vgpr6 = COPY [[COPY13]]
3984  ; GFX6:   $vgpr7 = COPY [[COPY14]]
3985  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3986  ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
3987  ; GFX7: bb.1 (%ir-block.0):
3988  ; GFX7:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3989  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3990  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3991  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3992  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3993  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
3994  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3995  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080
3996  ; GFX7:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3997  ; GFX7:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3998  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3999  ; GFX7: bb.2:
4000  ; GFX7:   successors: %bb.3, %bb.2
4001  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
4002  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
4003  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
4004  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
4005  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
4006  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
4007  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
4008  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
4009  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
4010  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4011  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
4012  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
4013  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4014  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4015  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
4016  ; GFX7: bb.3:
4017  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
4018  ; GFX7: bb.4:
4019  ; GFX7:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4020  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
4021  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
4022  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
4023  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
4024  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
4025  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
4026  ; GFX7:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
4027  ; GFX7:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
4028  ; GFX7:   $vgpr0 = COPY [[COPY7]]
4029  ; GFX7:   $vgpr1 = COPY [[COPY8]]
4030  ; GFX7:   $vgpr2 = COPY [[COPY9]]
4031  ; GFX7:   $vgpr3 = COPY [[COPY10]]
4032  ; GFX7:   $vgpr4 = COPY [[COPY11]]
4033  ; GFX7:   $vgpr5 = COPY [[COPY12]]
4034  ; GFX7:   $vgpr6 = COPY [[COPY13]]
4035  ; GFX7:   $vgpr7 = COPY [[COPY14]]
4036  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4037  ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
4038  ; GFX8: bb.1 (%ir-block.0):
4039  ; GFX8:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
4040  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4041  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4042  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4043  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4044  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
4045  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4046  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
4047  ; GFX8:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4048  ; GFX8:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4049  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
4050  ; GFX8: bb.2:
4051  ; GFX8:   successors: %bb.3, %bb.2
4052  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
4053  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
4054  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
4055  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
4056  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
4057  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
4058  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
4059  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
4060  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
4061  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4062  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
4063  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
4064  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4065  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4066  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
4067  ; GFX8: bb.3:
4068  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
4069  ; GFX8: bb.4:
4070  ; GFX8:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4071  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
4072  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
4073  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
4074  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
4075  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
4076  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
4077  ; GFX8:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
4078  ; GFX8:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
4079  ; GFX8:   $vgpr0 = COPY [[COPY7]]
4080  ; GFX8:   $vgpr1 = COPY [[COPY8]]
4081  ; GFX8:   $vgpr2 = COPY [[COPY9]]
4082  ; GFX8:   $vgpr3 = COPY [[COPY10]]
4083  ; GFX8:   $vgpr4 = COPY [[COPY11]]
4084  ; GFX8:   $vgpr5 = COPY [[COPY12]]
4085  ; GFX8:   $vgpr6 = COPY [[COPY13]]
4086  ; GFX8:   $vgpr7 = COPY [[COPY14]]
4087  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4088  %soffset = add i32 %offset.base, 4080
4089  %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
4090  ret <8 x float> %val
4091}
4092
4093define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064(<4 x i32> %rsrc, i32 %offset.base) {
4094  ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064
4095  ; GFX6: bb.1 (%ir-block.0):
4096  ; GFX6:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
4097  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4098  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4099  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4100  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4101  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4102  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4103  ; GFX6:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4104  ; GFX6:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4105  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
4106  ; GFX6: bb.2:
4107  ; GFX6:   successors: %bb.3, %bb.2
4108  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
4109  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
4110  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
4111  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
4112  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
4113  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
4114  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
4115  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
4116  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
4117  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4118  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4)
4119  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4)
4120  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4121  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4122  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
4123  ; GFX6: bb.3:
4124  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
4125  ; GFX6: bb.4:
4126  ; GFX6:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
4127  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
4128  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
4129  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
4130  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
4131  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
4132  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
4133  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
4134  ; GFX6:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
4135  ; GFX6:   $vgpr0 = COPY [[COPY6]]
4136  ; GFX6:   $vgpr1 = COPY [[COPY7]]
4137  ; GFX6:   $vgpr2 = COPY [[COPY8]]
4138  ; GFX6:   $vgpr3 = COPY [[COPY9]]
4139  ; GFX6:   $vgpr4 = COPY [[COPY10]]
4140  ; GFX6:   $vgpr5 = COPY [[COPY11]]
4141  ; GFX6:   $vgpr6 = COPY [[COPY12]]
4142  ; GFX6:   $vgpr7 = COPY [[COPY13]]
4143  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4144  ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064
4145  ; GFX7: bb.1 (%ir-block.0):
4146  ; GFX7:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
4147  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4148  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4149  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4150  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4151  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4152  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4153  ; GFX7:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4154  ; GFX7:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4155  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
4156  ; GFX7: bb.2:
4157  ; GFX7:   successors: %bb.3, %bb.2
4158  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
4159  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
4160  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
4161  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
4162  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
4163  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
4164  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
4165  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
4166  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
4167  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4168  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4)
4169  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4)
4170  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4171  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4172  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
4173  ; GFX7: bb.3:
4174  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
4175  ; GFX7: bb.4:
4176  ; GFX7:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
4177  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
4178  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
4179  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
4180  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
4181  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
4182  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
4183  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
4184  ; GFX7:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
4185  ; GFX7:   $vgpr0 = COPY [[COPY6]]
4186  ; GFX7:   $vgpr1 = COPY [[COPY7]]
4187  ; GFX7:   $vgpr2 = COPY [[COPY8]]
4188  ; GFX7:   $vgpr3 = COPY [[COPY9]]
4189  ; GFX7:   $vgpr4 = COPY [[COPY10]]
4190  ; GFX7:   $vgpr5 = COPY [[COPY11]]
4191  ; GFX7:   $vgpr6 = COPY [[COPY12]]
4192  ; GFX7:   $vgpr7 = COPY [[COPY13]]
4193  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4194  ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064
4195  ; GFX8: bb.1 (%ir-block.0):
4196  ; GFX8:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
4197  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4198  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4199  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4200  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4201  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4202  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4203  ; GFX8:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4204  ; GFX8:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4205  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
4206  ; GFX8: bb.2:
4207  ; GFX8:   successors: %bb.3, %bb.2
4208  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
4209  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
4210  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
4211  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
4212  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
4213  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
4214  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
4215  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
4216  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
4217  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4218  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4)
4219  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4)
4220  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4221  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4222  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
4223  ; GFX8: bb.3:
4224  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
4225  ; GFX8: bb.4:
4226  ; GFX8:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
4227  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
4228  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
4229  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
4230  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
4231  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
4232  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
4233  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
4234  ; GFX8:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
4235  ; GFX8:   $vgpr0 = COPY [[COPY6]]
4236  ; GFX8:   $vgpr1 = COPY [[COPY7]]
4237  ; GFX8:   $vgpr2 = COPY [[COPY8]]
4238  ; GFX8:   $vgpr3 = COPY [[COPY9]]
4239  ; GFX8:   $vgpr4 = COPY [[COPY10]]
4240  ; GFX8:   $vgpr5 = COPY [[COPY11]]
4241  ; GFX8:   $vgpr6 = COPY [[COPY12]]
4242  ; GFX8:   $vgpr7 = COPY [[COPY13]]
4243  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4244  %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0)
4245  ret <8 x float> %val
4246}
4247
4248define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
4249  ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr
4250  ; GFX6: bb.1 (%ir-block.0):
4251  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4252  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4253  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4254  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4255  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4256  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4257  ; GFX6:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4258  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4259  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4260  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4261  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
4262  ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr
4263  ; GFX7: bb.1 (%ir-block.0):
4264  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4265  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4266  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4267  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4268  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4269  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4270  ; GFX7:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4271  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4272  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4273  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4274  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
4275  ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr
4276  ; GFX8: bb.1 (%ir-block.0):
4277  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4278  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4279  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4280  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4281  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4282  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4283  ; GFX8:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4284  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4285  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4286  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4287  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
4288  %offset = add i32 %offset.v, %offset.s
4289  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
4290  ret float %val
4291}
4292
4293define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
4294  ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr
4295  ; GFX6: bb.1 (%ir-block.0):
4296  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4297  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4298  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4299  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4300  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4301  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4302  ; GFX6:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4303  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4304  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4305  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4306  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
4307  ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr
4308  ; GFX7: bb.1 (%ir-block.0):
4309  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4310  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4311  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4312  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4313  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4314  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4315  ; GFX7:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4316  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4317  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4318  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4319  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
4320  ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr
4321  ; GFX8: bb.1 (%ir-block.0):
4322  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4323  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4324  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4325  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4326  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4327  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4328  ; GFX8:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4329  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4330  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4331  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4332  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
4333  %offset = add i32 %offset.s, %offset.v
4334  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
4335  ret float %val
4336}
4337
4338define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
4339  ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm
4340  ; GFX6: bb.1 (%ir-block.0):
4341  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4342  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4343  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4344  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4345  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4346  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4347  ; GFX6:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4348  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4349  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
4350  ; GFX6:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
4351  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4352  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4353  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4354  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
4355  ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm
4356  ; GFX7: bb.1 (%ir-block.0):
4357  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4358  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4359  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4360  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4361  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4362  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4363  ; GFX7:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4364  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4365  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
4366  ; GFX7:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
4367  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4368  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4369  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4370  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
4371  ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm
4372  ; GFX8: bb.1 (%ir-block.0):
4373  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4374  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4375  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4376  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4377  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4378  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4379  ; GFX8:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4380  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4381  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
4382  ; GFX8:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
4383  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4384  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4385  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4386  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
4387  %offset.base = add i32 %offset.v, %offset.s
4388  %offset = add i32 %offset.base, 1024
4389  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
4390  ret float %val
4391}
4392
4393define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
4394  ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm
4395  ; GFX6: bb.1 (%ir-block.0):
4396  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4397  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4398  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4399  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4400  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4401  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4402  ; GFX6:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4403  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4404  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
4405  ; GFX6:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
4406  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4407  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4408  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4409  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
4410  ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm
4411  ; GFX7: bb.1 (%ir-block.0):
4412  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4413  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4414  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4415  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4416  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4417  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4418  ; GFX7:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4419  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4420  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
4421  ; GFX7:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
4422  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4423  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4424  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4425  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
4426  ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm
4427  ; GFX8: bb.1 (%ir-block.0):
4428  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4429  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4430  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4431  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4432  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4433  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4434  ; GFX8:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4435  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4436  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
4437  ; GFX8:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
4438  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4439  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4440  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4441  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
4442  %offset.base = add i32 %offset.s, %offset.v
4443  %offset = add i32 %offset.base, 1024
4444  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
4445  ret float %val
4446}
4447
4448; TODO: Ideally this would be reassociated to fold.
4449define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
4450  ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr
4451  ; GFX6: bb.1 (%ir-block.0):
4452  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4453  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4454  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4455  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4456  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4457  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4458  ; GFX6:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4459  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4460  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
4461  ; GFX6:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
4462  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4463  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4464  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
4465  ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr
4466  ; GFX7: bb.1 (%ir-block.0):
4467  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4468  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4469  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4470  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4471  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4472  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4473  ; GFX7:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4474  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4475  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
4476  ; GFX7:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
4477  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4478  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4479  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
4480  ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr
4481  ; GFX8: bb.1 (%ir-block.0):
4482  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4483  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4484  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4485  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4486  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4487  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4488  ; GFX8:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4489  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4490  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
4491  ; GFX8:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
4492  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4493  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4494  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
4495  %offset.base = add i32 %offset.s, 1024
4496  %offset = add i32 %offset.base, %offset.v
4497  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
4498  ret float %val
4499}
4500
4501define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
4502  ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr
4503  ; GFX6: bb.1 (%ir-block.0):
4504  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4505  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4506  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4507  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4508  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4509  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4510  ; GFX6:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4511  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4512  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
4513  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
4514  ; GFX6:   %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
4515  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4516  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4517  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
4518  ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr
4519  ; GFX7: bb.1 (%ir-block.0):
4520  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4521  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4522  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4523  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4524  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4525  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4526  ; GFX7:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4527  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4528  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
4529  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
4530  ; GFX7:   %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
4531  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4532  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4533  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
4534  ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr
4535  ; GFX8: bb.1 (%ir-block.0):
4536  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4537  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4538  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4539  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4540  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4541  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4542  ; GFX8:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4543  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4544  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
4545  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
4546  ; GFX8:   %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
4547  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4548  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4549  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
4550  %offset.base = add i32 %offset.v, 1024
4551  %offset = add i32 %offset.base, %offset.s
4552  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
4553  ret float %val
4554}
4555
4556declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
4557declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg)
4558declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32 immarg)
4559declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32 immarg)
4560declare <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32>, i32, i32 immarg)
4561declare <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32>, i32, i32 immarg)
4562
4563declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32      immarg)
4564declare <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32>, i32, i32 immarg)
4565declare <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32>, i32, i32 immarg)
4566declare <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32>, i32, i32 immarg)
4567declare <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32>, i32, i32 immarg)
4568declare <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32>, i32, i32 immarg)
4569
4570declare i96 @llvm.amdgcn.s.buffer.load.i96(<4 x i32>, i32, i32 immarg)
4571declare i256 @llvm.amdgcn.s.buffer.load.i256(<4 x i32>, i32, i32 immarg)
4572declare i512 @llvm.amdgcn.s.buffer.load.i512(<4 x i32>, i32, i32 immarg)
4573
4574declare <16 x i16> @llvm.amdgcn.s.buffer.load.v16i16(<4 x i32>, i32, i32 immarg)
4575declare <32 x i16> @llvm.amdgcn.s.buffer.load.v32i16(<4 x i32>, i32, i32 immarg)
4576
4577declare <4 x i64> @llvm.amdgcn.s.buffer.load.v4i64(<4 x i32>, i32, i32 immarg)
4578declare <8 x i64> @llvm.amdgcn.s.buffer.load.v8i64(<4 x i32>, i32, i32 immarg)
4579
4580declare <4 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v4p1i8(<4 x i32>, i32, i32 immarg)
4581declare <8 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v8p1i8(<4 x i32>, i32, i32 immarg)
4582