1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX7 %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s
5
6; FIXME: Merge with regbankselect, which mostly overlaps when all types supported.
7
8; Natural mapping
9define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
10  ; GFX6-LABEL: name: s_buffer_load_i32
11  ; GFX6: bb.1 (%ir-block.0):
12  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
13  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
14  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
15  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
16  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
17  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
18  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
19  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 4)
20  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
21  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
22  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
23  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
24  ; GFX7-LABEL: name: s_buffer_load_i32
25  ; GFX7: bb.1 (%ir-block.0):
26  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
27  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
28  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
29  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
30  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
31  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
32  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
33  ; GFX7:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 4)
34  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
35  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
36  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
37  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
38  ; GFX8-LABEL: name: s_buffer_load_i32
39  ; GFX8: bb.1 (%ir-block.0):
40  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
41  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
42  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
43  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
44  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
45  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
46  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
47  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 4)
48  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
49  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
50  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
51  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
52  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
53  ret i32 %val
54}
55
56define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
57  ; GFX6-LABEL: name: s_buffer_load_i32_glc
58  ; GFX6: bb.1 (%ir-block.0):
59  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
60  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
61  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
62  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
63  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
64  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
65  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
66  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1, 0 :: (dereferenceable invariant load 4)
67  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
68  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
69  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
70  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
71  ; GFX7-LABEL: name: s_buffer_load_i32_glc
72  ; GFX7: bb.1 (%ir-block.0):
73  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
74  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
75  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
76  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
77  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
78  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
79  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
80  ; GFX7:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1, 0 :: (dereferenceable invariant load 4)
81  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
82  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
83  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
84  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
85  ; GFX8-LABEL: name: s_buffer_load_i32_glc
86  ; GFX8: bb.1 (%ir-block.0):
87  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
88  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
89  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
90  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
91  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
92  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
93  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
94  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1, 0 :: (dereferenceable invariant load 4)
95  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
96  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
97  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
98  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
99  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 1)
100  ret i32 %val
101}
102
103define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
104  ; GFX6-LABEL: name: s_buffer_load_v2i32
105  ; GFX6: bb.1 (%ir-block.0):
106  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
107  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
108  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
109  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
110  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
111  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
112  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
113  ; GFX6:   [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 8, align 4)
114  ; GFX6:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
115  ; GFX6:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
116  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
117  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
118  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
119  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
120  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
121  ; GFX6:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
122  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
123  ; GFX7-LABEL: name: s_buffer_load_v2i32
124  ; GFX7: bb.1 (%ir-block.0):
125  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
126  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
127  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
128  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
129  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
130  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
131  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
132  ; GFX7:   [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 8, align 4)
133  ; GFX7:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
134  ; GFX7:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
135  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
136  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
137  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
138  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
139  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
140  ; GFX7:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
141  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
142  ; GFX8-LABEL: name: s_buffer_load_v2i32
143  ; GFX8: bb.1 (%ir-block.0):
144  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
145  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
146  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
147  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
148  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
149  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
150  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
151  ; GFX8:   [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 8, align 4)
152  ; GFX8:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
153  ; GFX8:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
154  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
155  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
156  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
157  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
158  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
159  ; GFX8:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
160  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
161  %val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
162  ret <2 x i32> %val
163}
164
165define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
166  ; GFX6-LABEL: name: s_buffer_load_v3i32
167  ; GFX6: bb.1 (%ir-block.0):
168  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
169  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
170  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
171  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
172  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
173  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
174  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
175  ; GFX6:   [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4)
176  ; GFX6:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
177  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11
178  ; GFX6:   [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]]
179  ; GFX6:   [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2
180  ; GFX6:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0
181  ; GFX6:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1
182  ; GFX6:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2
183  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
184  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
185  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
186  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
187  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
188  ; GFX6:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
189  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
190  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
191  ; GFX6:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
192  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
193  ; GFX7-LABEL: name: s_buffer_load_v3i32
194  ; GFX7: bb.1 (%ir-block.0):
195  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
196  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
197  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
198  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
199  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
200  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
201  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
202  ; GFX7:   [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4)
203  ; GFX7:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
204  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11
205  ; GFX7:   [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]]
206  ; GFX7:   [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2
207  ; GFX7:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0
208  ; GFX7:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1
209  ; GFX7:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2
210  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
211  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
212  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
213  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
214  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
215  ; GFX7:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
216  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
217  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
218  ; GFX7:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
219  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
220  ; GFX8-LABEL: name: s_buffer_load_v3i32
221  ; GFX8: bb.1 (%ir-block.0):
222  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
223  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
224  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
225  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
226  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
227  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
228  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
229  ; GFX8:   [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4)
230  ; GFX8:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
231  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11
232  ; GFX8:   [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]]
233  ; GFX8:   [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2
234  ; GFX8:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0
235  ; GFX8:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1
236  ; GFX8:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2
237  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
238  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
239  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
240  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
241  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
242  ; GFX8:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
243  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
244  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
245  ; GFX8:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
246  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
247  %val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
248  ret <3 x i32> %val
249}
250
251define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
252  ; GFX6-LABEL: name: s_buffer_load_v8i32
253  ; GFX6: bb.1 (%ir-block.0):
254  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
255  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
256  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
257  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
258  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
259  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
260  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
261  ; GFX6:   [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 32, align 4)
262  ; GFX6:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
263  ; GFX6:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
264  ; GFX6:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
265  ; GFX6:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3
266  ; GFX6:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4
267  ; GFX6:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5
268  ; GFX6:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6
269  ; GFX6:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7
270  ; GFX6:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
271  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
272  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
273  ; GFX6:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
274  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
275  ; GFX6:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
276  ; GFX6:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
277  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec
278  ; GFX6:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
279  ; GFX6:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
280  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
281  ; GFX6:   $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
282  ; GFX6:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
283  ; GFX6:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
284  ; GFX6:   $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
285  ; GFX6:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
286  ; GFX6:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec
287  ; GFX6:   $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
288  ; GFX6:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
289  ; GFX6:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec
290  ; GFX6:   $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
291  ; GFX6:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
292  ; GFX6:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec
293  ; GFX6:   $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
294  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
295  ; GFX7-LABEL: name: s_buffer_load_v8i32
296  ; GFX7: bb.1 (%ir-block.0):
297  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
298  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
299  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
300  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
301  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
302  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
303  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
304  ; GFX7:   [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 32, align 4)
305  ; GFX7:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
306  ; GFX7:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
307  ; GFX7:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
308  ; GFX7:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3
309  ; GFX7:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4
310  ; GFX7:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5
311  ; GFX7:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6
312  ; GFX7:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7
313  ; GFX7:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
314  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
315  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
316  ; GFX7:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
317  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
318  ; GFX7:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
319  ; GFX7:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
320  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec
321  ; GFX7:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
322  ; GFX7:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
323  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
324  ; GFX7:   $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
325  ; GFX7:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
326  ; GFX7:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
327  ; GFX7:   $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
328  ; GFX7:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
329  ; GFX7:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec
330  ; GFX7:   $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
331  ; GFX7:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
332  ; GFX7:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec
333  ; GFX7:   $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
334  ; GFX7:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
335  ; GFX7:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec
336  ; GFX7:   $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
337  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
338  ; GFX8-LABEL: name: s_buffer_load_v8i32
339  ; GFX8: bb.1 (%ir-block.0):
340  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
341  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
342  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
343  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
344  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
345  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
346  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
347  ; GFX8:   [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 32, align 4)
348  ; GFX8:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
349  ; GFX8:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
350  ; GFX8:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
351  ; GFX8:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3
352  ; GFX8:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4
353  ; GFX8:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5
354  ; GFX8:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6
355  ; GFX8:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7
356  ; GFX8:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
357  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
358  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
359  ; GFX8:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
360  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
361  ; GFX8:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
362  ; GFX8:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
363  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec
364  ; GFX8:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
365  ; GFX8:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
366  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
367  ; GFX8:   $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
368  ; GFX8:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
369  ; GFX8:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
370  ; GFX8:   $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
371  ; GFX8:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
372  ; GFX8:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec
373  ; GFX8:   $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
374  ; GFX8:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
375  ; GFX8:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec
376  ; GFX8:   $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
377  ; GFX8:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
378  ; GFX8:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec
379  ; GFX8:   $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
380  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
381  %val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
382  ret <8 x i32> %val
383}
384
385define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
386  ; GFX6-LABEL: name: s_buffer_load_v16i32
387  ; GFX6: bb.1 (%ir-block.0):
388  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
389  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
390  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
391  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
392  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
393  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
394  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
395  ; GFX6:   [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 64, align 4)
396  ; GFX6:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
397  ; GFX6:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
398  ; GFX6:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
399  ; GFX6:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3
400  ; GFX6:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4
401  ; GFX6:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5
402  ; GFX6:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6
403  ; GFX6:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7
404  ; GFX6:   [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8
405  ; GFX6:   [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9
406  ; GFX6:   [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10
407  ; GFX6:   [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11
408  ; GFX6:   [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12
409  ; GFX6:   [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13
410  ; GFX6:   [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14
411  ; GFX6:   [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15
412  ; GFX6:   [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
413  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec
414  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
415  ; GFX6:   [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
416  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec
417  ; GFX6:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
418  ; GFX6:   [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
419  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec
420  ; GFX6:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
421  ; GFX6:   [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
422  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
423  ; GFX6:   $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
424  ; GFX6:   [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
425  ; GFX6:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
426  ; GFX6:   $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
427  ; GFX6:   [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
428  ; GFX6:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec
429  ; GFX6:   $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
430  ; GFX6:   [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
431  ; GFX6:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec
432  ; GFX6:   $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
433  ; GFX6:   [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
434  ; GFX6:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec
435  ; GFX6:   $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
436  ; GFX6:   [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
437  ; GFX6:   [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec
438  ; GFX6:   $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]]
439  ; GFX6:   [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
440  ; GFX6:   [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec
441  ; GFX6:   $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]]
442  ; GFX6:   [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]]
443  ; GFX6:   [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
444  ; GFX6:   $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]]
445  ; GFX6:   [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]]
446  ; GFX6:   [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
447  ; GFX6:   $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]]
448  ; GFX6:   [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
449  ; GFX6:   [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec
450  ; GFX6:   $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]]
451  ; GFX6:   [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]]
452  ; GFX6:   [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec
453  ; GFX6:   $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]]
454  ; GFX6:   [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]]
455  ; GFX6:   [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec
456  ; GFX6:   $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]]
457  ; GFX6:   [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]]
458  ; GFX6:   [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec
459  ; GFX6:   $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]]
460  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
461  ; GFX7-LABEL: name: s_buffer_load_v16i32
462  ; GFX7: bb.1 (%ir-block.0):
463  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
464  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
465  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
466  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
467  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
468  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
469  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
470  ; GFX7:   [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 64, align 4)
471  ; GFX7:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
472  ; GFX7:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
473  ; GFX7:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
474  ; GFX7:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3
475  ; GFX7:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4
476  ; GFX7:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5
477  ; GFX7:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6
478  ; GFX7:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7
479  ; GFX7:   [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8
480  ; GFX7:   [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9
481  ; GFX7:   [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10
482  ; GFX7:   [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11
483  ; GFX7:   [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12
484  ; GFX7:   [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13
485  ; GFX7:   [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14
486  ; GFX7:   [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15
487  ; GFX7:   [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
488  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec
489  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
490  ; GFX7:   [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
491  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec
492  ; GFX7:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
493  ; GFX7:   [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
494  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec
495  ; GFX7:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
496  ; GFX7:   [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
497  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
498  ; GFX7:   $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
499  ; GFX7:   [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
500  ; GFX7:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
501  ; GFX7:   $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
502  ; GFX7:   [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
503  ; GFX7:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec
504  ; GFX7:   $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
505  ; GFX7:   [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
506  ; GFX7:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec
507  ; GFX7:   $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
508  ; GFX7:   [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
509  ; GFX7:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec
510  ; GFX7:   $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
511  ; GFX7:   [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
512  ; GFX7:   [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec
513  ; GFX7:   $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]]
514  ; GFX7:   [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
515  ; GFX7:   [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec
516  ; GFX7:   $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]]
517  ; GFX7:   [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]]
518  ; GFX7:   [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
519  ; GFX7:   $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]]
520  ; GFX7:   [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]]
521  ; GFX7:   [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
522  ; GFX7:   $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]]
523  ; GFX7:   [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
524  ; GFX7:   [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec
525  ; GFX7:   $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]]
526  ; GFX7:   [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]]
527  ; GFX7:   [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec
528  ; GFX7:   $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]]
529  ; GFX7:   [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]]
530  ; GFX7:   [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec
531  ; GFX7:   $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]]
532  ; GFX7:   [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]]
533  ; GFX7:   [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec
534  ; GFX7:   $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]]
535  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
536  ; GFX8-LABEL: name: s_buffer_load_v16i32
537  ; GFX8: bb.1 (%ir-block.0):
538  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
539  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
540  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
541  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
542  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
543  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
544  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
545  ; GFX8:   [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 64, align 4)
546  ; GFX8:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
547  ; GFX8:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
548  ; GFX8:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
549  ; GFX8:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3
550  ; GFX8:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4
551  ; GFX8:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5
552  ; GFX8:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6
553  ; GFX8:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7
554  ; GFX8:   [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8
555  ; GFX8:   [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9
556  ; GFX8:   [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10
557  ; GFX8:   [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11
558  ; GFX8:   [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12
559  ; GFX8:   [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13
560  ; GFX8:   [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14
561  ; GFX8:   [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15
562  ; GFX8:   [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
563  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec
564  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
565  ; GFX8:   [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
566  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec
567  ; GFX8:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
568  ; GFX8:   [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
569  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec
570  ; GFX8:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
571  ; GFX8:   [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
572  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
573  ; GFX8:   $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
574  ; GFX8:   [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
575  ; GFX8:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
576  ; GFX8:   $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
577  ; GFX8:   [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
578  ; GFX8:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec
579  ; GFX8:   $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
580  ; GFX8:   [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
581  ; GFX8:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec
582  ; GFX8:   $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
583  ; GFX8:   [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
584  ; GFX8:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec
585  ; GFX8:   $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
586  ; GFX8:   [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
587  ; GFX8:   [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec
588  ; GFX8:   $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]]
589  ; GFX8:   [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
590  ; GFX8:   [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec
591  ; GFX8:   $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]]
592  ; GFX8:   [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]]
593  ; GFX8:   [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
594  ; GFX8:   $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]]
595  ; GFX8:   [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]]
596  ; GFX8:   [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
597  ; GFX8:   $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]]
598  ; GFX8:   [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
599  ; GFX8:   [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec
600  ; GFX8:   $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]]
601  ; GFX8:   [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]]
602  ; GFX8:   [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec
603  ; GFX8:   $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]]
604  ; GFX8:   [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]]
605  ; GFX8:   [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec
606  ; GFX8:   $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]]
607  ; GFX8:   [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]]
608  ; GFX8:   [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec
609  ; GFX8:   $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]]
610  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
611  %val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
612  ret <16 x i32> %val
613}
614
615define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) {
616  ; GFX6-LABEL: name: s_buffer_load_i32_offset_1
617  ; GFX6: bb.1 (%ir-block.0):
618  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
619  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
620  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
621  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
622  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
623  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
624  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
625  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
626  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
627  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
628  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
629  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
630  ; GFX7-LABEL: name: s_buffer_load_i32_offset_1
631  ; GFX7: bb.1 (%ir-block.0):
632  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
633  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
634  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
635  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
636  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
637  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
638  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
639  ; GFX7:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
640  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
641  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
642  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
643  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
644  ; GFX8-LABEL: name: s_buffer_load_i32_offset_1
645  ; GFX8: bb.1 (%ir-block.0):
646  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
647  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
648  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
649  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
650  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
651  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
652  ; GFX8:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0, 0 :: (dereferenceable invariant load 4)
653  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
654  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
655  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
656  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
657  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1, i32 0)
658  ret i32 %val
659}
660
661define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) {
662  ; GFX6-LABEL: name: s_buffer_load_i32_offset_glc_4
663  ; GFX6: bb.1 (%ir-block.0):
664  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
665  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
666  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
667  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
668  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
669  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
670  ; GFX6:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1, 0 :: (dereferenceable invariant load 4)
671  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
672  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
673  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
674  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
675  ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_4
676  ; GFX7: bb.1 (%ir-block.0):
677  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
678  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
679  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
680  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
681  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
682  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
683  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1, 0 :: (dereferenceable invariant load 4)
684  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
685  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
686  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
687  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
688  ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_4
689  ; GFX8: bb.1 (%ir-block.0):
690  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
691  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
692  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
693  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
694  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
695  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
696  ; GFX8:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1, 0 :: (dereferenceable invariant load 4)
697  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
698  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
699  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
700  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
701  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 4, i32 1)
702  ret i32 %val
703}
704
705define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) {
706  ; GFX6-LABEL: name: s_buffer_load_i32_offset_255
707  ; GFX6: bb.1 (%ir-block.0):
708  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
709  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
710  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
711  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
712  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
713  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
714  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255
715  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
716  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
717  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
718  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
719  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
720  ; GFX7-LABEL: name: s_buffer_load_i32_offset_255
721  ; GFX7: bb.1 (%ir-block.0):
722  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
723  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
724  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
725  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
726  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
727  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
728  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255
729  ; GFX7:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
730  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
731  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
732  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
733  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
734  ; GFX8-LABEL: name: s_buffer_load_i32_offset_255
735  ; GFX8: bb.1 (%ir-block.0):
736  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
737  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
738  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
739  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
740  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
741  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
742  ; GFX8:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0, 0 :: (dereferenceable invariant load 4)
743  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
744  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
745  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
746  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
747  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 255, i32 0)
748  ret i32 %val
749}
750
751define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) {
752  ; GFX6-LABEL: name: s_buffer_load_i32_offset_256
753  ; GFX6: bb.1 (%ir-block.0):
754  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
755  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
756  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
757  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
758  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
759  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
760  ; GFX6:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0, 0 :: (dereferenceable invariant load 4)
761  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
762  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
763  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
764  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
765  ; GFX7-LABEL: name: s_buffer_load_i32_offset_256
766  ; GFX7: bb.1 (%ir-block.0):
767  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
768  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
769  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
770  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
771  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
772  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
773  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0, 0 :: (dereferenceable invariant load 4)
774  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
775  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
776  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
777  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
778  ; GFX8-LABEL: name: s_buffer_load_i32_offset_256
779  ; GFX8: bb.1 (%ir-block.0):
780  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
781  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
782  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
783  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
784  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
785  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
786  ; GFX8:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0, 0 :: (dereferenceable invariant load 4)
787  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
788  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
789  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
790  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
791  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 256, i32 0)
792  ret i32 %val
793}
794
795define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) {
796  ; GFX6-LABEL: name: s_buffer_load_i32_offset_1020
797  ; GFX6: bb.1 (%ir-block.0):
798  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
799  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
800  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
801  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
802  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
803  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
804  ; GFX6:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0, 0 :: (dereferenceable invariant load 4)
805  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
806  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
807  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
808  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
809  ; GFX7-LABEL: name: s_buffer_load_i32_offset_1020
810  ; GFX7: bb.1 (%ir-block.0):
811  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
812  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
813  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
814  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
815  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
816  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
817  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0, 0 :: (dereferenceable invariant load 4)
818  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
819  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
820  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
821  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
822  ; GFX8-LABEL: name: s_buffer_load_i32_offset_1020
823  ; GFX8: bb.1 (%ir-block.0):
824  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
825  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
826  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
827  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
828  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
829  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
830  ; GFX8:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0, 0 :: (dereferenceable invariant load 4)
831  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
832  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
833  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
834  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
835  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1020, i32 0)
836  ret i32 %val
837}
838
839define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) {
840  ; GFX6-LABEL: name: s_buffer_load_i32_offset_1023
841  ; GFX6: bb.1 (%ir-block.0):
842  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
843  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
844  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
845  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
846  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
847  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
848  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023
849  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
850  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
851  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
852  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
853  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
854  ; GFX7-LABEL: name: s_buffer_load_i32_offset_1023
855  ; GFX7: bb.1 (%ir-block.0):
856  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
857  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
858  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
859  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
860  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
861  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
862  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023
863  ; GFX7:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
864  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
865  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
866  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
867  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
868  ; GFX8-LABEL: name: s_buffer_load_i32_offset_1023
869  ; GFX8: bb.1 (%ir-block.0):
870  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
871  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
872  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
873  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
874  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
875  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
876  ; GFX8:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0, 0 :: (dereferenceable invariant load 4)
877  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
878  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
879  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
880  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
881  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1023, i32 0)
882  ret i32 %val
883}
884
885define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) {
886  ; GFX6-LABEL: name: s_buffer_load_i32_offset_1024
887  ; GFX6: bb.1 (%ir-block.0):
888  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
889  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
890  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
891  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
892  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
893  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
894  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
895  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
896  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
897  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
898  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
899  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
900  ; GFX7-LABEL: name: s_buffer_load_i32_offset_1024
901  ; GFX7: bb.1 (%ir-block.0):
902  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
903  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
904  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
905  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
906  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
907  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
908  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0, 0 :: (dereferenceable invariant load 4)
909  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
910  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
911  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
912  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
913  ; GFX8-LABEL: name: s_buffer_load_i32_offset_1024
914  ; GFX8: bb.1 (%ir-block.0):
915  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
916  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
917  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
918  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
919  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
920  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
921  ; GFX8:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0, 0 :: (dereferenceable invariant load 4)
922  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
923  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
924  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
925  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
926  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1024, i32 0)
927  ret i32 %val
928}
929
930define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) {
931  ; GFX6-LABEL: name: s_buffer_load_i32_offset_1025
932  ; GFX6: bb.1 (%ir-block.0):
933  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
934  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
935  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
936  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
937  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
938  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
939  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025
940  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
941  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
942  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
943  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
944  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
945  ; GFX7-LABEL: name: s_buffer_load_i32_offset_1025
946  ; GFX7: bb.1 (%ir-block.0):
947  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
948  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
949  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
950  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
951  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
952  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
953  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025
954  ; GFX7:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
955  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
956  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
957  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
958  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
959  ; GFX8-LABEL: name: s_buffer_load_i32_offset_1025
960  ; GFX8: bb.1 (%ir-block.0):
961  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
962  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
963  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
964  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
965  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
966  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
967  ; GFX8:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0, 0 :: (dereferenceable invariant load 4)
968  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
969  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
970  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
971  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
972  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1025, i32 0)
973  ret i32 %val
974}
975
976define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) {
977  ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg1
978  ; GFX6: bb.1 (%ir-block.0):
979  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
980  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
981  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
982  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
983  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
984  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
985  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
986  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
987  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
988  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
989  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
990  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
991  ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg1
992  ; GFX7: bb.1 (%ir-block.0):
993  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
994  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
995  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
996  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
997  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
998  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
999  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
1000  ; GFX7:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1001  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1002  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1003  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1004  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
1005  ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg1
1006  ; GFX8: bb.1 (%ir-block.0):
1007  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1008  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1009  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1010  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1011  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1012  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1013  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
1014  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1015  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1016  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1017  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1018  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
1019  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0)
1020  ret i32 %load
1021}
1022
1023define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) {
1024  ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg4
1025  ; GFX6: bb.1 (%ir-block.0):
1026  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1027  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1028  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1029  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1030  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1031  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1032  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4
1033  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1034  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1035  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1036  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1037  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
1038  ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg4
1039  ; GFX7: bb.1 (%ir-block.0):
1040  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1041  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1042  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1043  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1044  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1045  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1046  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0, 0 :: (dereferenceable invariant load 4)
1047  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1048  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1049  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1050  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
1051  ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg4
1052  ; GFX8: bb.1 (%ir-block.0):
1053  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1054  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1055  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1056  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1057  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1058  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1059  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4
1060  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1061  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1062  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1063  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1064  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
1065  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0)
1066  ret i32 %load
1067}
1068
1069define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) {
1070  ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg8
1071  ; GFX6: bb.1 (%ir-block.0):
1072  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1073  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1074  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1075  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1076  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1077  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1078  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8
1079  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1080  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1081  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1082  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1083  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
1084  ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg8
1085  ; GFX7: bb.1 (%ir-block.0):
1086  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1087  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1088  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1089  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1090  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1091  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1092  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0, 0 :: (dereferenceable invariant load 4)
1093  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1094  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1095  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1096  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
1097  ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg8
1098  ; GFX8: bb.1 (%ir-block.0):
1099  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1100  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1101  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1102  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1103  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1104  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1105  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8
1106  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1107  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1108  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1109  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1110  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
1111  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0)
1112  ret i32 %load
1113}
1114
1115define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) {
1116  ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit31
1117  ; GFX6: bb.1 (%ir-block.0):
1118  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1119  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1120  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1121  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1122  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1123  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1124  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
1125  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1126  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1127  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1128  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1129  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
1130  ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit31
1131  ; GFX7: bb.1 (%ir-block.0):
1132  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1133  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1134  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1135  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1136  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1137  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1138  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0, 0 :: (dereferenceable invariant load 4)
1139  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1140  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1141  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1142  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
1143  ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit31
1144  ; GFX8: bb.1 (%ir-block.0):
1145  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1146  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1147  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1148  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1149  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1150  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1151  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
1152  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1153  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1154  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1155  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1156  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
1157  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0)
1158  ret i32 %load
1159}
1160
1161define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc) {
1162  ; GFX6-LABEL: name: s_buffer_load_i32_offset_glc_bit30
1163  ; GFX6: bb.1 (%ir-block.0):
1164  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1165  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1166  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1167  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1168  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1169  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1170  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824
1171  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1, 0 :: (dereferenceable invariant load 4)
1172  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1173  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1174  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1175  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
1176  ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_bit30
1177  ; GFX7: bb.1 (%ir-block.0):
1178  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1179  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1180  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1181  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1182  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1183  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1184  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1, 0 :: (dereferenceable invariant load 4)
1185  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1186  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1187  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1188  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
1189  ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_bit30
1190  ; GFX8: bb.1 (%ir-block.0):
1191  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1192  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1193  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1194  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1195  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1196  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1197  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824
1198  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1, 0 :: (dereferenceable invariant load 4)
1199  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1200  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1201  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1202  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
1203  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 1)
1204  ret i32 %load
1205}
1206
1207define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) {
1208  ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit29
1209  ; GFX6: bb.1 (%ir-block.0):
1210  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1211  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1212  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1213  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1214  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1215  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1216  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912
1217  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1218  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1219  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1220  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1221  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
1222  ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit29
1223  ; GFX7: bb.1 (%ir-block.0):
1224  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1225  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1226  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1227  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1228  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1229  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1230  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0, 0 :: (dereferenceable invariant load 4)
1231  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1232  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1233  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1234  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
1235  ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit29
1236  ; GFX8: bb.1 (%ir-block.0):
1237  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1238  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1239  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1240  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1241  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1242  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1243  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912
1244  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1245  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1246  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1247  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1248  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
1249  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0)
1250  ret i32 %load
1251}
1252
1253define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) {
1254  ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit21
1255  ; GFX6: bb.1 (%ir-block.0):
1256  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1257  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1258  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1259  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1260  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1261  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1262  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152
1263  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1264  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1265  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1266  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1267  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
1268  ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit21
1269  ; GFX7: bb.1 (%ir-block.0):
1270  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1271  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1272  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1273  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1274  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1275  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1276  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0, 0 :: (dereferenceable invariant load 4)
1277  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1278  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1279  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1280  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
1281  ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit21
1282  ; GFX8: bb.1 (%ir-block.0):
1283  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1284  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1285  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1286  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1287  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1288  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1289  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152
1290  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1291  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1292  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1293  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1294  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
1295  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0)
1296  ret i32 %load
1297}
1298
1299define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) {
1300  ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit20
1301  ; GFX6: bb.1 (%ir-block.0):
1302  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1303  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1304  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1305  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1306  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1307  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1308  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
1309  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1310  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1311  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1312  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1313  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
1314  ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit20
1315  ; GFX7: bb.1 (%ir-block.0):
1316  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1317  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1318  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1319  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1320  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1321  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1322  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0, 0 :: (dereferenceable invariant load 4)
1323  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1324  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1325  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1326  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
1327  ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit20
1328  ; GFX8: bb.1 (%ir-block.0):
1329  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1330  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1331  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1332  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1333  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1334  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1335  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
1336  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1337  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1338  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1339  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1340  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
1341  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0)
1342  ret i32 %load
1343}
1344
1345define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc) {
1346  ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg_bit20
1347  ; GFX6: bb.1 (%ir-block.0):
1348  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1349  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1350  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1351  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1352  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1353  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1354  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576
1355  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1356  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1357  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1358  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1359  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
1360  ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit20
1361  ; GFX7: bb.1 (%ir-block.0):
1362  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1363  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1364  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1365  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1366  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1367  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1368  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0, 0 :: (dereferenceable invariant load 4)
1369  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1370  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1371  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1372  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
1373  ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit20
1374  ; GFX8: bb.1 (%ir-block.0):
1375  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1376  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1377  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1378  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1379  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1380  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1381  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576
1382  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1383  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1384  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1385  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1386  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
1387  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32  -1048576, i32 0)
1388  ret i32 %load
1389}
1390
1391define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) {
1392  ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit19
1393  ; GFX6: bb.1 (%ir-block.0):
1394  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1395  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1396  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1397  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1398  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1399  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1400  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 524288
1401  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1402  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1403  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1404  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1405  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
1406  ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit19
1407  ; GFX7: bb.1 (%ir-block.0):
1408  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1409  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1410  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1411  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1412  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1413  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1414  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0, 0 :: (dereferenceable invariant load 4)
1415  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1416  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1417  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1418  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
1419  ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit19
1420  ; GFX8: bb.1 (%ir-block.0):
1421  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1422  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1423  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1424  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1425  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1426  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1427  ; GFX8:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0, 0 :: (dereferenceable invariant load 4)
1428  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
1429  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1430  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1431  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
1432  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0)
1433  ret i32 %load
1434}
1435
1436define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc) {
1437  ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg_bit19
1438  ; GFX6: bb.1 (%ir-block.0):
1439  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1440  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1441  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1442  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1443  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1444  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1445  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288
1446  ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1447  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1448  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1449  ; GFX6:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1450  ; GFX6:   SI_RETURN_TO_EPILOG implicit $sgpr0
1451  ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit19
1452  ; GFX7: bb.1 (%ir-block.0):
1453  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1454  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1455  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1456  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1457  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1458  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1459  ; GFX7:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0, 0 :: (dereferenceable invariant load 4)
1460  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1461  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1462  ; GFX7:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1463  ; GFX7:   SI_RETURN_TO_EPILOG implicit $sgpr0
1464  ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit19
1465  ; GFX8: bb.1 (%ir-block.0):
1466  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1467  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1468  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1469  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1470  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1471  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1472  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288
1473  ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
1474  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1475  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1476  ; GFX8:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1477  ; GFX8:   SI_RETURN_TO_EPILOG implicit $sgpr0
1478  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0)
1479  ret i32 %load
1480}
1481
1482; Check cases that need to be converted to MUBUF due to the offset being a VGPR.
1483define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1484  ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset
1485  ; GFX6: bb.1 (%ir-block.0):
1486  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1487  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1488  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1489  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1490  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1491  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1492  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1493  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1494  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
1495  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
1496  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
1497  ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset
1498  ; GFX7: bb.1 (%ir-block.0):
1499  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1500  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1501  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1502  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1503  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1504  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1505  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1506  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1507  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
1508  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
1509  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
1510  ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset
1511  ; GFX8: bb.1 (%ir-block.0):
1512  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1513  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1514  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1515  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1516  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1517  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1518  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1519  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1520  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
1521  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
1522  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
1523  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1524  ret float %val
1525}
1526
1527define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1528  ; GFX6-LABEL: name: s_buffer_load_v2f32_vgpr_offset
1529  ; GFX6: bb.1 (%ir-block.0):
1530  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1531  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1532  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1533  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1534  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1535  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1536  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1537  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1538  ; GFX6:   [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4)
1539  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
1540  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
1541  ; GFX6:   $vgpr0 = COPY [[COPY5]]
1542  ; GFX6:   $vgpr1 = COPY [[COPY6]]
1543  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1544  ; GFX7-LABEL: name: s_buffer_load_v2f32_vgpr_offset
1545  ; GFX7: bb.1 (%ir-block.0):
1546  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1547  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1548  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1549  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1550  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1551  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1552  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1553  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1554  ; GFX7:   [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4)
1555  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
1556  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
1557  ; GFX7:   $vgpr0 = COPY [[COPY5]]
1558  ; GFX7:   $vgpr1 = COPY [[COPY6]]
1559  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1560  ; GFX8-LABEL: name: s_buffer_load_v2f32_vgpr_offset
1561  ; GFX8: bb.1 (%ir-block.0):
1562  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1563  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1564  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1565  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1566  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1567  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1568  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1569  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1570  ; GFX8:   [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4)
1571  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
1572  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
1573  ; GFX8:   $vgpr0 = COPY [[COPY5]]
1574  ; GFX8:   $vgpr1 = COPY [[COPY6]]
1575  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1576  %val = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1577  ret <2 x float> %val
1578}
1579
1580define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1581  ; GFX6-LABEL: name: s_buffer_load_v3f32_vgpr_offset
1582  ; GFX6: bb.1 (%ir-block.0):
1583  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1584  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1585  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1586  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1587  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1588  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1589  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1590  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1591  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1592  ; GFX6:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
1593  ; GFX6:   [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]]
1594  ; GFX6:   [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]]
1595  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11
1596  ; GFX6:   [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2
1597  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0
1598  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1
1599  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2
1600  ; GFX6:   $vgpr0 = COPY [[COPY8]]
1601  ; GFX6:   $vgpr1 = COPY [[COPY9]]
1602  ; GFX6:   $vgpr2 = COPY [[COPY10]]
1603  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
1604  ; GFX7-LABEL: name: s_buffer_load_v3f32_vgpr_offset
1605  ; GFX7: bb.1 (%ir-block.0):
1606  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1607  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1608  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1609  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1610  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1611  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1612  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1613  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1614  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1615  ; GFX7:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
1616  ; GFX7:   [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]]
1617  ; GFX7:   [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]]
1618  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11
1619  ; GFX7:   [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2
1620  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0
1621  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1
1622  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2
1623  ; GFX7:   $vgpr0 = COPY [[COPY8]]
1624  ; GFX7:   $vgpr1 = COPY [[COPY9]]
1625  ; GFX7:   $vgpr2 = COPY [[COPY10]]
1626  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
1627  ; GFX8-LABEL: name: s_buffer_load_v3f32_vgpr_offset
1628  ; GFX8: bb.1 (%ir-block.0):
1629  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1630  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1631  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1632  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1633  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1634  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1635  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1636  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1637  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1638  ; GFX8:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
1639  ; GFX8:   [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]]
1640  ; GFX8:   [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]]
1641  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11
1642  ; GFX8:   [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2
1643  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0
1644  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1
1645  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2
1646  ; GFX8:   $vgpr0 = COPY [[COPY8]]
1647  ; GFX8:   $vgpr1 = COPY [[COPY9]]
1648  ; GFX8:   $vgpr2 = COPY [[COPY10]]
1649  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
1650  %val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1651  ret <3 x float> %val
1652}
1653
1654define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1655  ; GFX6-LABEL: name: s_buffer_load_v4f32_vgpr_offset
1656  ; GFX6: bb.1 (%ir-block.0):
1657  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1658  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1659  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1660  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1661  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1662  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1663  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1664  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1665  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1666  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
1667  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
1668  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
1669  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
1670  ; GFX6:   $vgpr0 = COPY [[COPY5]]
1671  ; GFX6:   $vgpr1 = COPY [[COPY6]]
1672  ; GFX6:   $vgpr2 = COPY [[COPY7]]
1673  ; GFX6:   $vgpr3 = COPY [[COPY8]]
1674  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
1675  ; GFX7-LABEL: name: s_buffer_load_v4f32_vgpr_offset
1676  ; GFX7: bb.1 (%ir-block.0):
1677  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1678  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1679  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1680  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1681  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1682  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1683  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1684  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1685  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1686  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
1687  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
1688  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
1689  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
1690  ; GFX7:   $vgpr0 = COPY [[COPY5]]
1691  ; GFX7:   $vgpr1 = COPY [[COPY6]]
1692  ; GFX7:   $vgpr2 = COPY [[COPY7]]
1693  ; GFX7:   $vgpr3 = COPY [[COPY8]]
1694  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
1695  ; GFX8-LABEL: name: s_buffer_load_v4f32_vgpr_offset
1696  ; GFX8: bb.1 (%ir-block.0):
1697  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1698  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1699  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1700  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1701  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1702  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1703  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1704  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1705  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1706  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
1707  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
1708  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
1709  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
1710  ; GFX8:   $vgpr0 = COPY [[COPY5]]
1711  ; GFX8:   $vgpr1 = COPY [[COPY6]]
1712  ; GFX8:   $vgpr2 = COPY [[COPY7]]
1713  ; GFX8:   $vgpr3 = COPY [[COPY8]]
1714  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
1715  %val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1716  ret <4 x float> %val
1717}
1718
1719define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1720  ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset
1721  ; GFX6: bb.1 (%ir-block.0):
1722  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1723  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1724  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1725  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1726  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1727  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1728  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1729  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1730  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1731  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1732  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
1733  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
1734  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
1735  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
1736  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
1737  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
1738  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
1739  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
1740  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
1741  ; GFX6:   $vgpr0 = COPY [[COPY5]]
1742  ; GFX6:   $vgpr1 = COPY [[COPY6]]
1743  ; GFX6:   $vgpr2 = COPY [[COPY7]]
1744  ; GFX6:   $vgpr3 = COPY [[COPY8]]
1745  ; GFX6:   $vgpr4 = COPY [[COPY9]]
1746  ; GFX6:   $vgpr5 = COPY [[COPY10]]
1747  ; GFX6:   $vgpr6 = COPY [[COPY11]]
1748  ; GFX6:   $vgpr7 = COPY [[COPY12]]
1749  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
1750  ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset
1751  ; GFX7: bb.1 (%ir-block.0):
1752  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1753  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1754  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1755  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1756  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1757  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1758  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1759  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1760  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1761  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1762  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
1763  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
1764  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
1765  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
1766  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
1767  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
1768  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
1769  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
1770  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
1771  ; GFX7:   $vgpr0 = COPY [[COPY5]]
1772  ; GFX7:   $vgpr1 = COPY [[COPY6]]
1773  ; GFX7:   $vgpr2 = COPY [[COPY7]]
1774  ; GFX7:   $vgpr3 = COPY [[COPY8]]
1775  ; GFX7:   $vgpr4 = COPY [[COPY9]]
1776  ; GFX7:   $vgpr5 = COPY [[COPY10]]
1777  ; GFX7:   $vgpr6 = COPY [[COPY11]]
1778  ; GFX7:   $vgpr7 = COPY [[COPY12]]
1779  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
1780  ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset
1781  ; GFX8: bb.1 (%ir-block.0):
1782  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1783  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1784  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1785  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1786  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1787  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1788  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1789  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1790  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1791  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1792  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
1793  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
1794  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
1795  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
1796  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
1797  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
1798  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
1799  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
1800  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
1801  ; GFX8:   $vgpr0 = COPY [[COPY5]]
1802  ; GFX8:   $vgpr1 = COPY [[COPY6]]
1803  ; GFX8:   $vgpr2 = COPY [[COPY7]]
1804  ; GFX8:   $vgpr3 = COPY [[COPY8]]
1805  ; GFX8:   $vgpr4 = COPY [[COPY9]]
1806  ; GFX8:   $vgpr5 = COPY [[COPY10]]
1807  ; GFX8:   $vgpr6 = COPY [[COPY11]]
1808  ; GFX8:   $vgpr7 = COPY [[COPY12]]
1809  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
1810  %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1811  ret <8 x float> %val
1812}
1813
1814define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1815  ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset
1816  ; GFX6: bb.1 (%ir-block.0):
1817  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1818  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1819  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1820  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1821  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1822  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1823  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1824  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1825  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1826  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1827  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4)
1828  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4)
1829  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
1830  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
1831  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
1832  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
1833  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
1834  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
1835  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
1836  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
1837  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
1838  ; GFX6:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
1839  ; GFX6:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
1840  ; GFX6:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
1841  ; GFX6:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
1842  ; GFX6:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
1843  ; GFX6:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
1844  ; GFX6:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
1845  ; GFX6:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
1846  ; GFX6:   $vgpr0 = COPY [[COPY5]]
1847  ; GFX6:   $vgpr1 = COPY [[COPY6]]
1848  ; GFX6:   $vgpr2 = COPY [[COPY7]]
1849  ; GFX6:   $vgpr3 = COPY [[COPY8]]
1850  ; GFX6:   $vgpr4 = COPY [[COPY9]]
1851  ; GFX6:   $vgpr5 = COPY [[COPY10]]
1852  ; GFX6:   $vgpr6 = COPY [[COPY11]]
1853  ; GFX6:   $vgpr7 = COPY [[COPY12]]
1854  ; GFX6:   $vgpr8 = COPY [[COPY13]]
1855  ; GFX6:   $vgpr9 = COPY [[COPY14]]
1856  ; GFX6:   $vgpr10 = COPY [[COPY15]]
1857  ; GFX6:   $vgpr11 = COPY [[COPY16]]
1858  ; GFX6:   $vgpr12 = COPY [[COPY17]]
1859  ; GFX6:   $vgpr13 = COPY [[COPY18]]
1860  ; GFX6:   $vgpr14 = COPY [[COPY19]]
1861  ; GFX6:   $vgpr15 = COPY [[COPY20]]
1862  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
1863  ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset
1864  ; GFX7: bb.1 (%ir-block.0):
1865  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1866  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1867  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1868  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1869  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1870  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1871  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1872  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1873  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1874  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1875  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4)
1876  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4)
1877  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
1878  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
1879  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
1880  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
1881  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
1882  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
1883  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
1884  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
1885  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
1886  ; GFX7:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
1887  ; GFX7:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
1888  ; GFX7:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
1889  ; GFX7:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
1890  ; GFX7:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
1891  ; GFX7:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
1892  ; GFX7:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
1893  ; GFX7:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
1894  ; GFX7:   $vgpr0 = COPY [[COPY5]]
1895  ; GFX7:   $vgpr1 = COPY [[COPY6]]
1896  ; GFX7:   $vgpr2 = COPY [[COPY7]]
1897  ; GFX7:   $vgpr3 = COPY [[COPY8]]
1898  ; GFX7:   $vgpr4 = COPY [[COPY9]]
1899  ; GFX7:   $vgpr5 = COPY [[COPY10]]
1900  ; GFX7:   $vgpr6 = COPY [[COPY11]]
1901  ; GFX7:   $vgpr7 = COPY [[COPY12]]
1902  ; GFX7:   $vgpr8 = COPY [[COPY13]]
1903  ; GFX7:   $vgpr9 = COPY [[COPY14]]
1904  ; GFX7:   $vgpr10 = COPY [[COPY15]]
1905  ; GFX7:   $vgpr11 = COPY [[COPY16]]
1906  ; GFX7:   $vgpr12 = COPY [[COPY17]]
1907  ; GFX7:   $vgpr13 = COPY [[COPY18]]
1908  ; GFX7:   $vgpr14 = COPY [[COPY19]]
1909  ; GFX7:   $vgpr15 = COPY [[COPY20]]
1910  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
1911  ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset
1912  ; GFX8: bb.1 (%ir-block.0):
1913  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1914  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1915  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1916  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1917  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1918  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1919  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1920  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1921  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1922  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
1923  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4)
1924  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4)
1925  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
1926  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
1927  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
1928  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
1929  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
1930  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
1931  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
1932  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
1933  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
1934  ; GFX8:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
1935  ; GFX8:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
1936  ; GFX8:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
1937  ; GFX8:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
1938  ; GFX8:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
1939  ; GFX8:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
1940  ; GFX8:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
1941  ; GFX8:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
1942  ; GFX8:   $vgpr0 = COPY [[COPY5]]
1943  ; GFX8:   $vgpr1 = COPY [[COPY6]]
1944  ; GFX8:   $vgpr2 = COPY [[COPY7]]
1945  ; GFX8:   $vgpr3 = COPY [[COPY8]]
1946  ; GFX8:   $vgpr4 = COPY [[COPY9]]
1947  ; GFX8:   $vgpr5 = COPY [[COPY10]]
1948  ; GFX8:   $vgpr6 = COPY [[COPY11]]
1949  ; GFX8:   $vgpr7 = COPY [[COPY12]]
1950  ; GFX8:   $vgpr8 = COPY [[COPY13]]
1951  ; GFX8:   $vgpr9 = COPY [[COPY14]]
1952  ; GFX8:   $vgpr10 = COPY [[COPY15]]
1953  ; GFX8:   $vgpr11 = COPY [[COPY16]]
1954  ; GFX8:   $vgpr12 = COPY [[COPY17]]
1955  ; GFX8:   $vgpr13 = COPY [[COPY18]]
1956  ; GFX8:   $vgpr14 = COPY [[COPY19]]
1957  ; GFX8:   $vgpr15 = COPY [[COPY20]]
1958  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
1959  %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1960  ret <16 x float> %val
1961}
1962
1963define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg %rsrc, i32 %soffset.base) {
1964  ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092
1965  ; GFX6: bb.1 (%ir-block.0):
1966  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1967  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1968  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1969  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1970  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1971  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1972  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1973  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1974  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
1975  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
1976  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
1977  ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092
1978  ; GFX7: bb.1 (%ir-block.0):
1979  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1980  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1981  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1982  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1983  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1984  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1985  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1986  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1987  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
1988  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
1989  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
1990  ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092
1991  ; GFX8: bb.1 (%ir-block.0):
1992  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1993  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1994  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1995  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1996  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1997  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1998  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1999  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2000  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2001  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2002  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
2003  %soffset = add i32 %soffset.base, 4092
2004  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2005  ret float %val
2006}
2007
2008define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2009  ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095
2010  ; GFX6: bb.1 (%ir-block.0):
2011  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2012  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2013  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2014  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2015  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2016  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2017  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2018  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2019  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2020  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2021  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
2022  ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095
2023  ; GFX7: bb.1 (%ir-block.0):
2024  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2025  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2026  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2027  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2028  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2029  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2030  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2031  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2032  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2033  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2034  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
2035  ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095
2036  ; GFX8: bb.1 (%ir-block.0):
2037  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2038  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2039  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2040  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2041  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2042  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2043  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2044  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2045  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2046  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2047  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
2048  %soffset = add i32 %soffset.base, 4095
2049  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2050  ret float %val
2051}
2052
2053define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2054  ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096
2055  ; GFX6: bb.1 (%ir-block.0):
2056  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2057  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2058  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2059  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2060  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2061  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2062  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2063  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
2064  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2065  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2066  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
2067  ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096
2068  ; GFX7: bb.1 (%ir-block.0):
2069  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2070  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2071  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2072  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2073  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2074  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2075  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2076  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
2077  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2078  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2079  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
2080  ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096
2081  ; GFX8: bb.1 (%ir-block.0):
2082  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2083  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2084  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2085  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2086  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2087  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2088  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2089  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
2090  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2091  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2092  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
2093  %soffset = add i32 %soffset.base, 4096
2094  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2095  ret float %val
2096}
2097
2098; Make sure the base offset is added to each split load.
2099define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2100  ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064
2101  ; GFX6: bb.1 (%ir-block.0):
2102  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2103  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2104  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2105  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2106  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2107  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2108  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2109  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2110  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2111  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2112  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2113  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2114  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2115  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2116  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2117  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2118  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2119  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2120  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2121  ; GFX6:   $vgpr0 = COPY [[COPY5]]
2122  ; GFX6:   $vgpr1 = COPY [[COPY6]]
2123  ; GFX6:   $vgpr2 = COPY [[COPY7]]
2124  ; GFX6:   $vgpr3 = COPY [[COPY8]]
2125  ; GFX6:   $vgpr4 = COPY [[COPY9]]
2126  ; GFX6:   $vgpr5 = COPY [[COPY10]]
2127  ; GFX6:   $vgpr6 = COPY [[COPY11]]
2128  ; GFX6:   $vgpr7 = COPY [[COPY12]]
2129  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2130  ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064
2131  ; GFX7: bb.1 (%ir-block.0):
2132  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2133  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2134  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2135  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2136  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2137  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2138  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2139  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2140  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2141  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2142  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2143  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2144  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2145  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2146  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2147  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2148  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2149  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2150  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2151  ; GFX7:   $vgpr0 = COPY [[COPY5]]
2152  ; GFX7:   $vgpr1 = COPY [[COPY6]]
2153  ; GFX7:   $vgpr2 = COPY [[COPY7]]
2154  ; GFX7:   $vgpr3 = COPY [[COPY8]]
2155  ; GFX7:   $vgpr4 = COPY [[COPY9]]
2156  ; GFX7:   $vgpr5 = COPY [[COPY10]]
2157  ; GFX7:   $vgpr6 = COPY [[COPY11]]
2158  ; GFX7:   $vgpr7 = COPY [[COPY12]]
2159  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2160  ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064
2161  ; GFX8: bb.1 (%ir-block.0):
2162  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2163  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2164  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2165  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2166  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2167  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2168  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2169  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2170  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2171  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2172  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2173  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2174  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2175  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2176  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2177  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2178  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2179  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2180  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2181  ; GFX8:   $vgpr0 = COPY [[COPY5]]
2182  ; GFX8:   $vgpr1 = COPY [[COPY6]]
2183  ; GFX8:   $vgpr2 = COPY [[COPY7]]
2184  ; GFX8:   $vgpr3 = COPY [[COPY8]]
2185  ; GFX8:   $vgpr4 = COPY [[COPY9]]
2186  ; GFX8:   $vgpr5 = COPY [[COPY10]]
2187  ; GFX8:   $vgpr6 = COPY [[COPY11]]
2188  ; GFX8:   $vgpr7 = COPY [[COPY12]]
2189  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2190  %soffset = add i32 %soffset.base, 4064
2191  %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2192  ret <8 x float> %val
2193}
2194
2195; Make sure the maximum offset isn't exeeded when splitting this
2196define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2197  ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068
2198  ; GFX6: bb.1 (%ir-block.0):
2199  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2200  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2201  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2202  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2203  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2204  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2205  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2206  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
2207  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2208  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2209  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2210  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2211  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2212  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2213  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2214  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2215  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2216  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2217  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2218  ; GFX6:   $vgpr0 = COPY [[COPY5]]
2219  ; GFX6:   $vgpr1 = COPY [[COPY6]]
2220  ; GFX6:   $vgpr2 = COPY [[COPY7]]
2221  ; GFX6:   $vgpr3 = COPY [[COPY8]]
2222  ; GFX6:   $vgpr4 = COPY [[COPY9]]
2223  ; GFX6:   $vgpr5 = COPY [[COPY10]]
2224  ; GFX6:   $vgpr6 = COPY [[COPY11]]
2225  ; GFX6:   $vgpr7 = COPY [[COPY12]]
2226  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2227  ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068
2228  ; GFX7: bb.1 (%ir-block.0):
2229  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2230  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2231  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2232  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2233  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2234  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2235  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2236  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
2237  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2238  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2239  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2240  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2241  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2242  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2243  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2244  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2245  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2246  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2247  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2248  ; GFX7:   $vgpr0 = COPY [[COPY5]]
2249  ; GFX7:   $vgpr1 = COPY [[COPY6]]
2250  ; GFX7:   $vgpr2 = COPY [[COPY7]]
2251  ; GFX7:   $vgpr3 = COPY [[COPY8]]
2252  ; GFX7:   $vgpr4 = COPY [[COPY9]]
2253  ; GFX7:   $vgpr5 = COPY [[COPY10]]
2254  ; GFX7:   $vgpr6 = COPY [[COPY11]]
2255  ; GFX7:   $vgpr7 = COPY [[COPY12]]
2256  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2257  ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068
2258  ; GFX8: bb.1 (%ir-block.0):
2259  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2260  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2261  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2262  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2263  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2264  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2265  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2266  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4
2267  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2268  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2269  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2270  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2271  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2272  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2273  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2274  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2275  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2276  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2277  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2278  ; GFX8:   $vgpr0 = COPY [[COPY5]]
2279  ; GFX8:   $vgpr1 = COPY [[COPY6]]
2280  ; GFX8:   $vgpr2 = COPY [[COPY7]]
2281  ; GFX8:   $vgpr3 = COPY [[COPY8]]
2282  ; GFX8:   $vgpr4 = COPY [[COPY9]]
2283  ; GFX8:   $vgpr5 = COPY [[COPY10]]
2284  ; GFX8:   $vgpr6 = COPY [[COPY11]]
2285  ; GFX8:   $vgpr7 = COPY [[COPY12]]
2286  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2287  %soffset = add i32 %soffset.base, 4068
2288  %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2289  ret <8 x float> %val
2290}
2291
2292define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2293  ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032
2294  ; GFX6: bb.1 (%ir-block.0):
2295  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2296  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2297  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2298  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2299  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2300  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2301  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2302  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2303  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2304  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2305  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4)
2306  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4)
2307  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2308  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2309  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2310  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2311  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2312  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2313  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2314  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2315  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2316  ; GFX6:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2317  ; GFX6:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2318  ; GFX6:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2319  ; GFX6:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2320  ; GFX6:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2321  ; GFX6:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2322  ; GFX6:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2323  ; GFX6:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2324  ; GFX6:   $vgpr0 = COPY [[COPY5]]
2325  ; GFX6:   $vgpr1 = COPY [[COPY6]]
2326  ; GFX6:   $vgpr2 = COPY [[COPY7]]
2327  ; GFX6:   $vgpr3 = COPY [[COPY8]]
2328  ; GFX6:   $vgpr4 = COPY [[COPY9]]
2329  ; GFX6:   $vgpr5 = COPY [[COPY10]]
2330  ; GFX6:   $vgpr6 = COPY [[COPY11]]
2331  ; GFX6:   $vgpr7 = COPY [[COPY12]]
2332  ; GFX6:   $vgpr8 = COPY [[COPY13]]
2333  ; GFX6:   $vgpr9 = COPY [[COPY14]]
2334  ; GFX6:   $vgpr10 = COPY [[COPY15]]
2335  ; GFX6:   $vgpr11 = COPY [[COPY16]]
2336  ; GFX6:   $vgpr12 = COPY [[COPY17]]
2337  ; GFX6:   $vgpr13 = COPY [[COPY18]]
2338  ; GFX6:   $vgpr14 = COPY [[COPY19]]
2339  ; GFX6:   $vgpr15 = COPY [[COPY20]]
2340  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2341  ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032
2342  ; GFX7: bb.1 (%ir-block.0):
2343  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2344  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2345  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2346  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2347  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2348  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2349  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2350  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2351  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2352  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2353  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4)
2354  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4)
2355  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2356  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2357  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2358  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2359  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2360  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2361  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2362  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2363  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2364  ; GFX7:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2365  ; GFX7:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2366  ; GFX7:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2367  ; GFX7:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2368  ; GFX7:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2369  ; GFX7:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2370  ; GFX7:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2371  ; GFX7:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2372  ; GFX7:   $vgpr0 = COPY [[COPY5]]
2373  ; GFX7:   $vgpr1 = COPY [[COPY6]]
2374  ; GFX7:   $vgpr2 = COPY [[COPY7]]
2375  ; GFX7:   $vgpr3 = COPY [[COPY8]]
2376  ; GFX7:   $vgpr4 = COPY [[COPY9]]
2377  ; GFX7:   $vgpr5 = COPY [[COPY10]]
2378  ; GFX7:   $vgpr6 = COPY [[COPY11]]
2379  ; GFX7:   $vgpr7 = COPY [[COPY12]]
2380  ; GFX7:   $vgpr8 = COPY [[COPY13]]
2381  ; GFX7:   $vgpr9 = COPY [[COPY14]]
2382  ; GFX7:   $vgpr10 = COPY [[COPY15]]
2383  ; GFX7:   $vgpr11 = COPY [[COPY16]]
2384  ; GFX7:   $vgpr12 = COPY [[COPY17]]
2385  ; GFX7:   $vgpr13 = COPY [[COPY18]]
2386  ; GFX7:   $vgpr14 = COPY [[COPY19]]
2387  ; GFX7:   $vgpr15 = COPY [[COPY20]]
2388  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2389  ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032
2390  ; GFX8: bb.1 (%ir-block.0):
2391  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2392  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2393  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2394  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2395  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2396  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2397  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2398  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2399  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2400  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2401  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4)
2402  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4)
2403  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2404  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2405  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2406  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2407  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2408  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2409  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2410  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2411  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2412  ; GFX8:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2413  ; GFX8:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2414  ; GFX8:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2415  ; GFX8:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2416  ; GFX8:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2417  ; GFX8:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2418  ; GFX8:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2419  ; GFX8:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2420  ; GFX8:   $vgpr0 = COPY [[COPY5]]
2421  ; GFX8:   $vgpr1 = COPY [[COPY6]]
2422  ; GFX8:   $vgpr2 = COPY [[COPY7]]
2423  ; GFX8:   $vgpr3 = COPY [[COPY8]]
2424  ; GFX8:   $vgpr4 = COPY [[COPY9]]
2425  ; GFX8:   $vgpr5 = COPY [[COPY10]]
2426  ; GFX8:   $vgpr6 = COPY [[COPY11]]
2427  ; GFX8:   $vgpr7 = COPY [[COPY12]]
2428  ; GFX8:   $vgpr8 = COPY [[COPY13]]
2429  ; GFX8:   $vgpr9 = COPY [[COPY14]]
2430  ; GFX8:   $vgpr10 = COPY [[COPY15]]
2431  ; GFX8:   $vgpr11 = COPY [[COPY16]]
2432  ; GFX8:   $vgpr12 = COPY [[COPY17]]
2433  ; GFX8:   $vgpr13 = COPY [[COPY18]]
2434  ; GFX8:   $vgpr14 = COPY [[COPY19]]
2435  ; GFX8:   $vgpr15 = COPY [[COPY20]]
2436  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2437  %soffset = add i32 %soffset.base, 4032
2438  %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2439  ret <16 x float> %val
2440}
2441
2442define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2443  ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036
2444  ; GFX6: bb.1 (%ir-block.0):
2445  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2446  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2447  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2448  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2449  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2450  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2451  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2452  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036
2453  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2454  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2455  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4)
2456  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4)
2457  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2458  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2459  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2460  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2461  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2462  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2463  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2464  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2465  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2466  ; GFX6:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2467  ; GFX6:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2468  ; GFX6:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2469  ; GFX6:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2470  ; GFX6:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2471  ; GFX6:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2472  ; GFX6:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2473  ; GFX6:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2474  ; GFX6:   $vgpr0 = COPY [[COPY5]]
2475  ; GFX6:   $vgpr1 = COPY [[COPY6]]
2476  ; GFX6:   $vgpr2 = COPY [[COPY7]]
2477  ; GFX6:   $vgpr3 = COPY [[COPY8]]
2478  ; GFX6:   $vgpr4 = COPY [[COPY9]]
2479  ; GFX6:   $vgpr5 = COPY [[COPY10]]
2480  ; GFX6:   $vgpr6 = COPY [[COPY11]]
2481  ; GFX6:   $vgpr7 = COPY [[COPY12]]
2482  ; GFX6:   $vgpr8 = COPY [[COPY13]]
2483  ; GFX6:   $vgpr9 = COPY [[COPY14]]
2484  ; GFX6:   $vgpr10 = COPY [[COPY15]]
2485  ; GFX6:   $vgpr11 = COPY [[COPY16]]
2486  ; GFX6:   $vgpr12 = COPY [[COPY17]]
2487  ; GFX6:   $vgpr13 = COPY [[COPY18]]
2488  ; GFX6:   $vgpr14 = COPY [[COPY19]]
2489  ; GFX6:   $vgpr15 = COPY [[COPY20]]
2490  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2491  ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036
2492  ; GFX7: bb.1 (%ir-block.0):
2493  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2494  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2495  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2496  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2497  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2498  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2499  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2500  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036
2501  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2502  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2503  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4)
2504  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4)
2505  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2506  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2507  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2508  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2509  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2510  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2511  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2512  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2513  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2514  ; GFX7:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2515  ; GFX7:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2516  ; GFX7:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2517  ; GFX7:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2518  ; GFX7:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2519  ; GFX7:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2520  ; GFX7:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2521  ; GFX7:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2522  ; GFX7:   $vgpr0 = COPY [[COPY5]]
2523  ; GFX7:   $vgpr1 = COPY [[COPY6]]
2524  ; GFX7:   $vgpr2 = COPY [[COPY7]]
2525  ; GFX7:   $vgpr3 = COPY [[COPY8]]
2526  ; GFX7:   $vgpr4 = COPY [[COPY9]]
2527  ; GFX7:   $vgpr5 = COPY [[COPY10]]
2528  ; GFX7:   $vgpr6 = COPY [[COPY11]]
2529  ; GFX7:   $vgpr7 = COPY [[COPY12]]
2530  ; GFX7:   $vgpr8 = COPY [[COPY13]]
2531  ; GFX7:   $vgpr9 = COPY [[COPY14]]
2532  ; GFX7:   $vgpr10 = COPY [[COPY15]]
2533  ; GFX7:   $vgpr11 = COPY [[COPY16]]
2534  ; GFX7:   $vgpr12 = COPY [[COPY17]]
2535  ; GFX7:   $vgpr13 = COPY [[COPY18]]
2536  ; GFX7:   $vgpr14 = COPY [[COPY19]]
2537  ; GFX7:   $vgpr15 = COPY [[COPY20]]
2538  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2539  ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036
2540  ; GFX8: bb.1 (%ir-block.0):
2541  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2542  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2543  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2544  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2545  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2546  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2547  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2548  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4
2549  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2550  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
2551  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 16, align 4)
2552  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 48, align 4)
2553  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2554  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2555  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2556  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2557  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2558  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2559  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2560  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2561  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2562  ; GFX8:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2563  ; GFX8:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2564  ; GFX8:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2565  ; GFX8:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2566  ; GFX8:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2567  ; GFX8:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2568  ; GFX8:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2569  ; GFX8:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2570  ; GFX8:   $vgpr0 = COPY [[COPY5]]
2571  ; GFX8:   $vgpr1 = COPY [[COPY6]]
2572  ; GFX8:   $vgpr2 = COPY [[COPY7]]
2573  ; GFX8:   $vgpr3 = COPY [[COPY8]]
2574  ; GFX8:   $vgpr4 = COPY [[COPY9]]
2575  ; GFX8:   $vgpr5 = COPY [[COPY10]]
2576  ; GFX8:   $vgpr6 = COPY [[COPY11]]
2577  ; GFX8:   $vgpr7 = COPY [[COPY12]]
2578  ; GFX8:   $vgpr8 = COPY [[COPY13]]
2579  ; GFX8:   $vgpr9 = COPY [[COPY14]]
2580  ; GFX8:   $vgpr10 = COPY [[COPY15]]
2581  ; GFX8:   $vgpr11 = COPY [[COPY16]]
2582  ; GFX8:   $vgpr12 = COPY [[COPY17]]
2583  ; GFX8:   $vgpr13 = COPY [[COPY18]]
2584  ; GFX8:   $vgpr14 = COPY [[COPY19]]
2585  ; GFX8:   $vgpr15 = COPY [[COPY20]]
2586  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2587  %soffset = add i32 %soffset.base, 4036
2588  %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2589  ret <16 x float> %val
2590}
2591
2592; Waterfall loop due to resource being VGPR
2593define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %soffset) {
2594  ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc
2595  ; GFX6: bb.1 (%ir-block.0):
2596  ; GFX6:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2597  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2598  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2599  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2600  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2601  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2602  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2603  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
2604  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2605  ; GFX6:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2606  ; GFX6:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2607  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2608  ; GFX6: bb.2:
2609  ; GFX6:   successors: %bb.3, %bb.2
2610  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
2611  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
2612  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2613  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
2614  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
2615  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
2616  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2617  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
2618  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2619  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2620  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2621  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2622  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2623  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
2624  ; GFX6: bb.3:
2625  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
2626  ; GFX6: bb.4:
2627  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2628  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
2629  ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc
2630  ; GFX7: bb.1 (%ir-block.0):
2631  ; GFX7:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2632  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2633  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2634  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2635  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2636  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2637  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2638  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
2639  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2640  ; GFX7:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2641  ; GFX7:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2642  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2643  ; GFX7: bb.2:
2644  ; GFX7:   successors: %bb.3, %bb.2
2645  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
2646  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
2647  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2648  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
2649  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
2650  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
2651  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2652  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
2653  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2654  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2655  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2656  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2657  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2658  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
2659  ; GFX7: bb.3:
2660  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
2661  ; GFX7: bb.4:
2662  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2663  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
2664  ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc
2665  ; GFX8: bb.1 (%ir-block.0):
2666  ; GFX8:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2667  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2668  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2669  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2670  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2671  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2672  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2673  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
2674  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2675  ; GFX8:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2676  ; GFX8:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2677  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2678  ; GFX8: bb.2:
2679  ; GFX8:   successors: %bb.3, %bb.2
2680  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
2681  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
2682  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2683  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
2684  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
2685  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
2686  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2687  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
2688  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2689  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2690  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2691  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2692  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2693  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
2694  ; GFX8: bb.3:
2695  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
2696  ; GFX8: bb.4:
2697  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2698  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
2699  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2700  ret float %val
2701}
2702
2703; Use the offset inside the waterfall loop
2704define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %rsrc, i32 inreg %soffset.base) {
2705  ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092
2706  ; GFX6: bb.1 (%ir-block.0):
2707  ; GFX6:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2708  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2709  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2710  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2711  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2712  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2713  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2714  ; GFX6:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2715  ; GFX6:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2716  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2717  ; GFX6: bb.2:
2718  ; GFX6:   successors: %bb.3, %bb.2
2719  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
2720  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
2721  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2722  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
2723  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
2724  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
2725  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2726  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
2727  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2728  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2729  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2730  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2731  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2732  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
2733  ; GFX6: bb.3:
2734  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
2735  ; GFX6: bb.4:
2736  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
2737  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
2738  ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092
2739  ; GFX7: bb.1 (%ir-block.0):
2740  ; GFX7:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2741  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2742  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2743  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2744  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2745  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2746  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2747  ; GFX7:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2748  ; GFX7:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2749  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2750  ; GFX7: bb.2:
2751  ; GFX7:   successors: %bb.3, %bb.2
2752  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
2753  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
2754  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2755  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
2756  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
2757  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
2758  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2759  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
2760  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2761  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2762  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2763  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2764  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2765  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
2766  ; GFX7: bb.3:
2767  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
2768  ; GFX7: bb.4:
2769  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
2770  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
2771  ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092
2772  ; GFX8: bb.1 (%ir-block.0):
2773  ; GFX8:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2774  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2775  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2776  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2777  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2778  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2779  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2780  ; GFX8:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2781  ; GFX8:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2782  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2783  ; GFX8: bb.2:
2784  ; GFX8:   successors: %bb.3, %bb.2
2785  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
2786  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
2787  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2788  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
2789  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
2790  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
2791  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2792  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
2793  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2794  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2795  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2796  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2797  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2798  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
2799  ; GFX8: bb.3:
2800  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
2801  ; GFX8: bb.4:
2802  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
2803  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
2804  %soffset = add i32 %soffset.base, 4092
2805  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2806  ret float %val
2807}
2808
2809; Scalar offset exceeds MUBUF limit, keep add out of the loop
2810define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) {
2811  ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096
2812  ; GFX6: bb.1 (%ir-block.0):
2813  ; GFX6:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2814  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2815  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2816  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2817  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2818  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2819  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2820  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
2821  ; GFX6:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
2822  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
2823  ; GFX6:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2824  ; GFX6:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2825  ; GFX6:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2826  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2827  ; GFX6: bb.2:
2828  ; GFX6:   successors: %bb.3, %bb.2
2829  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
2830  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
2831  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2832  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
2833  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
2834  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
2835  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2836  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
2837  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2838  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2839  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2840  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2841  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2842  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
2843  ; GFX6: bb.3:
2844  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
2845  ; GFX6: bb.4:
2846  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2847  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
2848  ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096
2849  ; GFX7: bb.1 (%ir-block.0):
2850  ; GFX7:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2851  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2852  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2853  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2854  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2855  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2856  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2857  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
2858  ; GFX7:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
2859  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
2860  ; GFX7:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2861  ; GFX7:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2862  ; GFX7:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2863  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2864  ; GFX7: bb.2:
2865  ; GFX7:   successors: %bb.3, %bb.2
2866  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
2867  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
2868  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2869  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
2870  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
2871  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
2872  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2873  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
2874  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2875  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2876  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2877  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2878  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2879  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
2880  ; GFX7: bb.3:
2881  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
2882  ; GFX7: bb.4:
2883  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2884  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
2885  ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096
2886  ; GFX8: bb.1 (%ir-block.0):
2887  ; GFX8:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2888  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2889  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2890  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2891  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2892  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2893  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2894  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
2895  ; GFX8:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
2896  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
2897  ; GFX8:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2898  ; GFX8:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2899  ; GFX8:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2900  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2901  ; GFX8: bb.2:
2902  ; GFX8:   successors: %bb.3, %bb.2
2903  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
2904  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
2905  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2906  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
2907  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
2908  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
2909  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2910  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
2911  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2912  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2913  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
2914  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2915  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2916  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
2917  ; GFX8: bb.3:
2918  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
2919  ; GFX8: bb.4:
2920  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2921  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
2922  %soffset = add i32 %soffset.base, 4096
2923  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2924  ret float %val
2925}
2926
2927; Waterfall loop, but constant offset
2928define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) {
2929  ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095
2930  ; GFX6: bb.1 (%ir-block.0):
2931  ; GFX6:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
2932  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2933  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2934  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2935  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2936  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2937  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2938  ; GFX6:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2939  ; GFX6:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2940  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2941  ; GFX6: bb.2:
2942  ; GFX6:   successors: %bb.3, %bb.2
2943  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
2944  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
2945  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2946  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
2947  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
2948  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
2949  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2950  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
2951  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2952  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2953  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 + 4095, align 1)
2954  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2955  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2956  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
2957  ; GFX6: bb.3:
2958  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
2959  ; GFX6: bb.4:
2960  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
2961  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
2962  ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095
2963  ; GFX7: bb.1 (%ir-block.0):
2964  ; GFX7:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
2965  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2966  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2967  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2968  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2969  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2970  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2971  ; GFX7:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2972  ; GFX7:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2973  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
2974  ; GFX7: bb.2:
2975  ; GFX7:   successors: %bb.3, %bb.2
2976  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
2977  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
2978  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
2979  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
2980  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
2981  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
2982  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
2983  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
2984  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
2985  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2986  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 + 4095, align 1)
2987  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2988  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2989  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
2990  ; GFX7: bb.3:
2991  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
2992  ; GFX7: bb.4:
2993  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
2994  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
2995  ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095
2996  ; GFX8: bb.1 (%ir-block.0):
2997  ; GFX8:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
2998  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2999  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3000  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3001  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3002  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3003  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3004  ; GFX8:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3005  ; GFX8:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3006  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3007  ; GFX8: bb.2:
3008  ; GFX8:   successors: %bb.3, %bb.2
3009  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
3010  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
3011  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3012  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
3013  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3014  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3015  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3016  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
3017  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3018  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3019  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 + 4095, align 1)
3020  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3021  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3022  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3023  ; GFX8: bb.3:
3024  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3025  ; GFX8: bb.4:
3026  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
3027  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
3028  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 0)
3029  ret float %val
3030}
3031
3032; Waterfall loop, but constant offset
3033define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) {
3034  ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096
3035  ; GFX6: bb.1 (%ir-block.0):
3036  ; GFX6:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
3037  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3038  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3039  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3040  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3041  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3042  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3043  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
3044  ; GFX6:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3045  ; GFX6:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3046  ; GFX6:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3047  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3048  ; GFX6: bb.2:
3049  ; GFX6:   successors: %bb.3, %bb.2
3050  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3051  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3052  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3053  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3054  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3055  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3056  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3057  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3058  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3059  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3060  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
3061  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3062  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3063  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3064  ; GFX6: bb.3:
3065  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3066  ; GFX6: bb.4:
3067  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
3068  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
3069  ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096
3070  ; GFX7: bb.1 (%ir-block.0):
3071  ; GFX7:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
3072  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3073  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3074  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3075  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3076  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3077  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3078  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
3079  ; GFX7:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3080  ; GFX7:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3081  ; GFX7:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3082  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3083  ; GFX7: bb.2:
3084  ; GFX7:   successors: %bb.3, %bb.2
3085  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3086  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3087  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3088  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3089  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3090  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3091  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3092  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3093  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3094  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3095  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
3096  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3097  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3098  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3099  ; GFX7: bb.3:
3100  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3101  ; GFX7: bb.4:
3102  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
3103  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
3104  ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096
3105  ; GFX8: bb.1 (%ir-block.0):
3106  ; GFX8:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
3107  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3108  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3109  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3110  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3111  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3112  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
3113  ; GFX8:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3114  ; GFX8:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3115  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3116  ; GFX8: bb.2:
3117  ; GFX8:   successors: %bb.3, %bb.2
3118  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
3119  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
3120  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3121  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
3122  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3123  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3124  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3125  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
3126  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3127  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3128  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 + 4096)
3129  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3130  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3131  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3132  ; GFX8: bb.3:
3133  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3134  ; GFX8: bb.4:
3135  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
3136  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
3137  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 0)
3138  ret float %val
3139}
3140
3141; Need a waterfall loop, but the offset is scalar.
3142; Make sure the base offset is added to each split load.
3143define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %rsrc, i32 inreg %soffset.base) {
3144  ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064
3145  ; GFX6: bb.1 (%ir-block.0):
3146  ; GFX6:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3147  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3148  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3149  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3150  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3151  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3152  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3153  ; GFX6:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3154  ; GFX6:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3155  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3156  ; GFX6: bb.2:
3157  ; GFX6:   successors: %bb.3, %bb.2
3158  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3159  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3160  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3161  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3162  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3163  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3164  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3165  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3166  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3167  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3168  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3169  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3170  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3171  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3172  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3173  ; GFX6: bb.3:
3174  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3175  ; GFX6: bb.4:
3176  ; GFX6:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
3177  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3178  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3179  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3180  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3181  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3182  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3183  ; GFX6:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3184  ; GFX6:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3185  ; GFX6:   $vgpr0 = COPY [[COPY7]]
3186  ; GFX6:   $vgpr1 = COPY [[COPY8]]
3187  ; GFX6:   $vgpr2 = COPY [[COPY9]]
3188  ; GFX6:   $vgpr3 = COPY [[COPY10]]
3189  ; GFX6:   $vgpr4 = COPY [[COPY11]]
3190  ; GFX6:   $vgpr5 = COPY [[COPY12]]
3191  ; GFX6:   $vgpr6 = COPY [[COPY13]]
3192  ; GFX6:   $vgpr7 = COPY [[COPY14]]
3193  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3194  ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064
3195  ; GFX7: bb.1 (%ir-block.0):
3196  ; GFX7:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3197  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3198  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3199  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3200  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3201  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3202  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3203  ; GFX7:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3204  ; GFX7:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3205  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3206  ; GFX7: bb.2:
3207  ; GFX7:   successors: %bb.3, %bb.2
3208  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3209  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3210  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3211  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3212  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3213  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3214  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3215  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3216  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3217  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3218  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3219  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3220  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3221  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3222  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3223  ; GFX7: bb.3:
3224  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3225  ; GFX7: bb.4:
3226  ; GFX7:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
3227  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3228  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3229  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3230  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3231  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3232  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3233  ; GFX7:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3234  ; GFX7:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3235  ; GFX7:   $vgpr0 = COPY [[COPY7]]
3236  ; GFX7:   $vgpr1 = COPY [[COPY8]]
3237  ; GFX7:   $vgpr2 = COPY [[COPY9]]
3238  ; GFX7:   $vgpr3 = COPY [[COPY10]]
3239  ; GFX7:   $vgpr4 = COPY [[COPY11]]
3240  ; GFX7:   $vgpr5 = COPY [[COPY12]]
3241  ; GFX7:   $vgpr6 = COPY [[COPY13]]
3242  ; GFX7:   $vgpr7 = COPY [[COPY14]]
3243  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3244  ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064
3245  ; GFX8: bb.1 (%ir-block.0):
3246  ; GFX8:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3247  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3248  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3249  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3250  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3251  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3252  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3253  ; GFX8:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3254  ; GFX8:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3255  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3256  ; GFX8: bb.2:
3257  ; GFX8:   successors: %bb.3, %bb.2
3258  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3259  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3260  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3261  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3262  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3263  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3264  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3265  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3266  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3267  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3268  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3269  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3270  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3271  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3272  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3273  ; GFX8: bb.3:
3274  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3275  ; GFX8: bb.4:
3276  ; GFX8:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
3277  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3278  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3279  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3280  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3281  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3282  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3283  ; GFX8:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3284  ; GFX8:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3285  ; GFX8:   $vgpr0 = COPY [[COPY7]]
3286  ; GFX8:   $vgpr1 = COPY [[COPY8]]
3287  ; GFX8:   $vgpr2 = COPY [[COPY9]]
3288  ; GFX8:   $vgpr3 = COPY [[COPY10]]
3289  ; GFX8:   $vgpr4 = COPY [[COPY11]]
3290  ; GFX8:   $vgpr5 = COPY [[COPY12]]
3291  ; GFX8:   $vgpr6 = COPY [[COPY13]]
3292  ; GFX8:   $vgpr7 = COPY [[COPY14]]
3293  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3294  %soffset = add i32 %soffset.base, 4064
3295  %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
3296  ret <8 x float> %val
3297}
3298
3299; Need a waterfall loop, but the offset is scalar.
3300; Make sure the maximum offset isn't exeeded when splitting this
3301define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %rsrc, i32 inreg %soffset.base) {
3302  ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068
3303  ; GFX6: bb.1 (%ir-block.0):
3304  ; GFX6:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3305  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3306  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3307  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3308  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3309  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3310  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3311  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
3312  ; GFX6:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
3313  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3314  ; GFX6:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3315  ; GFX6:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3316  ; GFX6:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3317  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3318  ; GFX6: bb.2:
3319  ; GFX6:   successors: %bb.3, %bb.2
3320  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3321  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3322  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3323  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
3324  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
3325  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
3326  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3327  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
3328  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3329  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3330  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3331  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3332  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3333  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3334  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3335  ; GFX6: bb.3:
3336  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3337  ; GFX6: bb.4:
3338  ; GFX6:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3339  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3340  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3341  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3342  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3343  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3344  ; GFX6:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3345  ; GFX6:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3346  ; GFX6:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3347  ; GFX6:   $vgpr0 = COPY [[COPY8]]
3348  ; GFX6:   $vgpr1 = COPY [[COPY9]]
3349  ; GFX6:   $vgpr2 = COPY [[COPY10]]
3350  ; GFX6:   $vgpr3 = COPY [[COPY11]]
3351  ; GFX6:   $vgpr4 = COPY [[COPY12]]
3352  ; GFX6:   $vgpr5 = COPY [[COPY13]]
3353  ; GFX6:   $vgpr6 = COPY [[COPY14]]
3354  ; GFX6:   $vgpr7 = COPY [[COPY15]]
3355  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3356  ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068
3357  ; GFX7: bb.1 (%ir-block.0):
3358  ; GFX7:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3359  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3360  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3361  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3362  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3363  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3364  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3365  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
3366  ; GFX7:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
3367  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3368  ; GFX7:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3369  ; GFX7:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3370  ; GFX7:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3371  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3372  ; GFX7: bb.2:
3373  ; GFX7:   successors: %bb.3, %bb.2
3374  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3375  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3376  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3377  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
3378  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
3379  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
3380  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3381  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
3382  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3383  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3384  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3385  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3386  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3387  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3388  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3389  ; GFX7: bb.3:
3390  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3391  ; GFX7: bb.4:
3392  ; GFX7:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3393  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3394  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3395  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3396  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3397  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3398  ; GFX7:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3399  ; GFX7:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3400  ; GFX7:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3401  ; GFX7:   $vgpr0 = COPY [[COPY8]]
3402  ; GFX7:   $vgpr1 = COPY [[COPY9]]
3403  ; GFX7:   $vgpr2 = COPY [[COPY10]]
3404  ; GFX7:   $vgpr3 = COPY [[COPY11]]
3405  ; GFX7:   $vgpr4 = COPY [[COPY12]]
3406  ; GFX7:   $vgpr5 = COPY [[COPY13]]
3407  ; GFX7:   $vgpr6 = COPY [[COPY14]]
3408  ; GFX7:   $vgpr7 = COPY [[COPY15]]
3409  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3410  ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068
3411  ; GFX8: bb.1 (%ir-block.0):
3412  ; GFX8:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3413  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3414  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3415  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3416  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3417  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3418  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3419  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
3420  ; GFX8:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
3421  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3422  ; GFX8:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3423  ; GFX8:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3424  ; GFX8:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3425  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3426  ; GFX8: bb.2:
3427  ; GFX8:   successors: %bb.3, %bb.2
3428  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3429  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3430  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3431  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
3432  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
3433  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
3434  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3435  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
3436  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3437  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3438  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3439  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3440  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3441  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3442  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3443  ; GFX8: bb.3:
3444  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3445  ; GFX8: bb.4:
3446  ; GFX8:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3447  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3448  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3449  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3450  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3451  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3452  ; GFX8:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3453  ; GFX8:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3454  ; GFX8:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3455  ; GFX8:   $vgpr0 = COPY [[COPY8]]
3456  ; GFX8:   $vgpr1 = COPY [[COPY9]]
3457  ; GFX8:   $vgpr2 = COPY [[COPY10]]
3458  ; GFX8:   $vgpr3 = COPY [[COPY11]]
3459  ; GFX8:   $vgpr4 = COPY [[COPY12]]
3460  ; GFX8:   $vgpr5 = COPY [[COPY13]]
3461  ; GFX8:   $vgpr6 = COPY [[COPY14]]
3462  ; GFX8:   $vgpr7 = COPY [[COPY15]]
3463  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3464  %soffset = add i32 %soffset.base, 4068
3465  %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
3466  ret <8 x float> %val
3467}
3468
3469define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) {
3470  ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096
3471  ; GFX6: bb.1 (%ir-block.0):
3472  ; GFX6:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3473  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3474  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3475  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3476  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3477  ; GFX6:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3478  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3479  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3480  ; GFX6:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
3481  ; GFX6:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3482  ; GFX6:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3483  ; GFX6:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3484  ; GFX6:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3485  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3486  ; GFX6: bb.2:
3487  ; GFX6:   successors: %bb.3, %bb.2
3488  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3489  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3490  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3491  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
3492  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
3493  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
3494  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3495  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
3496  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3497  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3498  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3499  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3500  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3501  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3502  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3503  ; GFX6: bb.3:
3504  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3505  ; GFX6: bb.4:
3506  ; GFX6:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3507  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3508  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3509  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3510  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3511  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3512  ; GFX6:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3513  ; GFX6:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3514  ; GFX6:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3515  ; GFX6:   $vgpr0 = COPY [[COPY8]]
3516  ; GFX6:   $vgpr1 = COPY [[COPY9]]
3517  ; GFX6:   $vgpr2 = COPY [[COPY10]]
3518  ; GFX6:   $vgpr3 = COPY [[COPY11]]
3519  ; GFX6:   $vgpr4 = COPY [[COPY12]]
3520  ; GFX6:   $vgpr5 = COPY [[COPY13]]
3521  ; GFX6:   $vgpr6 = COPY [[COPY14]]
3522  ; GFX6:   $vgpr7 = COPY [[COPY15]]
3523  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3524  ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096
3525  ; GFX7: bb.1 (%ir-block.0):
3526  ; GFX7:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3527  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3528  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3529  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3530  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3531  ; GFX7:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3532  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3533  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3534  ; GFX7:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
3535  ; GFX7:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3536  ; GFX7:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3537  ; GFX7:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3538  ; GFX7:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3539  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3540  ; GFX7: bb.2:
3541  ; GFX7:   successors: %bb.3, %bb.2
3542  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3543  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3544  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3545  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
3546  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
3547  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
3548  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3549  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
3550  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3551  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3552  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3553  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3554  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3555  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3556  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3557  ; GFX7: bb.3:
3558  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3559  ; GFX7: bb.4:
3560  ; GFX7:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3561  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3562  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3563  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3564  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3565  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3566  ; GFX7:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3567  ; GFX7:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3568  ; GFX7:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3569  ; GFX7:   $vgpr0 = COPY [[COPY8]]
3570  ; GFX7:   $vgpr1 = COPY [[COPY9]]
3571  ; GFX7:   $vgpr2 = COPY [[COPY10]]
3572  ; GFX7:   $vgpr3 = COPY [[COPY11]]
3573  ; GFX7:   $vgpr4 = COPY [[COPY12]]
3574  ; GFX7:   $vgpr5 = COPY [[COPY13]]
3575  ; GFX7:   $vgpr6 = COPY [[COPY14]]
3576  ; GFX7:   $vgpr7 = COPY [[COPY15]]
3577  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3578  ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096
3579  ; GFX8: bb.1 (%ir-block.0):
3580  ; GFX8:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3581  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3582  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3583  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3584  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3585  ; GFX8:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3586  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3587  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3588  ; GFX8:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
3589  ; GFX8:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3590  ; GFX8:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3591  ; GFX8:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3592  ; GFX8:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3593  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3594  ; GFX8: bb.2:
3595  ; GFX8:   successors: %bb.3, %bb.2
3596  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3597  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3598  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3599  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
3600  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
3601  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
3602  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3603  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
3604  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3605  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3606  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3607  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3608  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3609  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3610  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3611  ; GFX8: bb.3:
3612  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3613  ; GFX8: bb.4:
3614  ; GFX8:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3615  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3616  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3617  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3618  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3619  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3620  ; GFX8:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3621  ; GFX8:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3622  ; GFX8:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3623  ; GFX8:   $vgpr0 = COPY [[COPY8]]
3624  ; GFX8:   $vgpr1 = COPY [[COPY9]]
3625  ; GFX8:   $vgpr2 = COPY [[COPY10]]
3626  ; GFX8:   $vgpr3 = COPY [[COPY11]]
3627  ; GFX8:   $vgpr4 = COPY [[COPY12]]
3628  ; GFX8:   $vgpr5 = COPY [[COPY13]]
3629  ; GFX8:   $vgpr6 = COPY [[COPY14]]
3630  ; GFX8:   $vgpr7 = COPY [[COPY15]]
3631  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3632  %soffset = add i32 %soffset.base, 4096
3633  %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
3634  ret <8 x float> %val
3635}
3636
3637define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000(<4 x i32> %rsrc, i32 %offset.base) {
3638  ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
3639  ; GFX6: bb.1 (%ir-block.0):
3640  ; GFX6:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3641  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3642  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3643  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3644  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3645  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
3646  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3647  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000
3648  ; GFX6:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3649  ; GFX6:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3650  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3651  ; GFX6: bb.2:
3652  ; GFX6:   successors: %bb.3, %bb.2
3653  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3654  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3655  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3656  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3657  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3658  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3659  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3660  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3661  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3662  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3663  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3664  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3665  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3666  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3667  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3668  ; GFX6: bb.3:
3669  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3670  ; GFX6: bb.4:
3671  ; GFX6:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3672  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3673  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3674  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3675  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3676  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3677  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3678  ; GFX6:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3679  ; GFX6:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3680  ; GFX6:   $vgpr0 = COPY [[COPY7]]
3681  ; GFX6:   $vgpr1 = COPY [[COPY8]]
3682  ; GFX6:   $vgpr2 = COPY [[COPY9]]
3683  ; GFX6:   $vgpr3 = COPY [[COPY10]]
3684  ; GFX6:   $vgpr4 = COPY [[COPY11]]
3685  ; GFX6:   $vgpr5 = COPY [[COPY12]]
3686  ; GFX6:   $vgpr6 = COPY [[COPY13]]
3687  ; GFX6:   $vgpr7 = COPY [[COPY14]]
3688  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3689  ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
3690  ; GFX7: bb.1 (%ir-block.0):
3691  ; GFX7:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3692  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3693  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3694  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3695  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3696  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
3697  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3698  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000
3699  ; GFX7:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3700  ; GFX7:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3701  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3702  ; GFX7: bb.2:
3703  ; GFX7:   successors: %bb.3, %bb.2
3704  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3705  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3706  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3707  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3708  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3709  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3710  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3711  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3712  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3713  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3714  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3715  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3716  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3717  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3718  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3719  ; GFX7: bb.3:
3720  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3721  ; GFX7: bb.4:
3722  ; GFX7:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3723  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3724  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3725  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3726  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3727  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3728  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3729  ; GFX7:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3730  ; GFX7:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3731  ; GFX7:   $vgpr0 = COPY [[COPY7]]
3732  ; GFX7:   $vgpr1 = COPY [[COPY8]]
3733  ; GFX7:   $vgpr2 = COPY [[COPY9]]
3734  ; GFX7:   $vgpr3 = COPY [[COPY10]]
3735  ; GFX7:   $vgpr4 = COPY [[COPY11]]
3736  ; GFX7:   $vgpr5 = COPY [[COPY12]]
3737  ; GFX7:   $vgpr6 = COPY [[COPY13]]
3738  ; GFX7:   $vgpr7 = COPY [[COPY14]]
3739  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3740  ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
3741  ; GFX8: bb.1 (%ir-block.0):
3742  ; GFX8:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3743  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3744  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3745  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3746  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3747  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
3748  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3749  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4064
3750  ; GFX8:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3751  ; GFX8:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3752  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3753  ; GFX8: bb.2:
3754  ; GFX8:   successors: %bb.3, %bb.2
3755  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3756  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3757  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3758  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3759  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3760  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3761  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3762  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3763  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3764  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3765  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 936, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3766  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 952, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3767  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3768  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3769  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3770  ; GFX8: bb.3:
3771  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3772  ; GFX8: bb.4:
3773  ; GFX8:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3774  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3775  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3776  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3777  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3778  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3779  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3780  ; GFX8:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3781  ; GFX8:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3782  ; GFX8:   $vgpr0 = COPY [[COPY7]]
3783  ; GFX8:   $vgpr1 = COPY [[COPY8]]
3784  ; GFX8:   $vgpr2 = COPY [[COPY9]]
3785  ; GFX8:   $vgpr3 = COPY [[COPY10]]
3786  ; GFX8:   $vgpr4 = COPY [[COPY11]]
3787  ; GFX8:   $vgpr5 = COPY [[COPY12]]
3788  ; GFX8:   $vgpr6 = COPY [[COPY13]]
3789  ; GFX8:   $vgpr7 = COPY [[COPY14]]
3790  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3791  %soffset = add i32 %offset.base, 5000
3792  %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
3793  ret <8 x float> %val
3794}
3795
3796define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076(<4 x i32> %rsrc, i32 %offset.base) {
3797  ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
3798  ; GFX6: bb.1 (%ir-block.0):
3799  ; GFX6:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3800  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3801  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3802  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3803  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3804  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
3805  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3806  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076
3807  ; GFX6:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3808  ; GFX6:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3809  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3810  ; GFX6: bb.2:
3811  ; GFX6:   successors: %bb.3, %bb.2
3812  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3813  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3814  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3815  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3816  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3817  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3818  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3819  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3820  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3821  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3822  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3823  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3824  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3825  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3826  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3827  ; GFX6: bb.3:
3828  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3829  ; GFX6: bb.4:
3830  ; GFX6:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3831  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3832  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3833  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3834  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3835  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3836  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3837  ; GFX6:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3838  ; GFX6:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3839  ; GFX6:   $vgpr0 = COPY [[COPY7]]
3840  ; GFX6:   $vgpr1 = COPY [[COPY8]]
3841  ; GFX6:   $vgpr2 = COPY [[COPY9]]
3842  ; GFX6:   $vgpr3 = COPY [[COPY10]]
3843  ; GFX6:   $vgpr4 = COPY [[COPY11]]
3844  ; GFX6:   $vgpr5 = COPY [[COPY12]]
3845  ; GFX6:   $vgpr6 = COPY [[COPY13]]
3846  ; GFX6:   $vgpr7 = COPY [[COPY14]]
3847  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3848  ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
3849  ; GFX7: bb.1 (%ir-block.0):
3850  ; GFX7:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3851  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3852  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3853  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3854  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3855  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
3856  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3857  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076
3858  ; GFX7:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3859  ; GFX7:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3860  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3861  ; GFX7: bb.2:
3862  ; GFX7:   successors: %bb.3, %bb.2
3863  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3864  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3865  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3866  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3867  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3868  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3869  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3870  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3871  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3872  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3873  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3874  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3875  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3876  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3877  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3878  ; GFX7: bb.3:
3879  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3880  ; GFX7: bb.4:
3881  ; GFX7:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3882  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3883  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3884  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3885  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3886  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3887  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3888  ; GFX7:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3889  ; GFX7:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3890  ; GFX7:   $vgpr0 = COPY [[COPY7]]
3891  ; GFX7:   $vgpr1 = COPY [[COPY8]]
3892  ; GFX7:   $vgpr2 = COPY [[COPY9]]
3893  ; GFX7:   $vgpr3 = COPY [[COPY10]]
3894  ; GFX7:   $vgpr4 = COPY [[COPY11]]
3895  ; GFX7:   $vgpr5 = COPY [[COPY12]]
3896  ; GFX7:   $vgpr6 = COPY [[COPY13]]
3897  ; GFX7:   $vgpr7 = COPY [[COPY14]]
3898  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3899  ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
3900  ; GFX8: bb.1 (%ir-block.0):
3901  ; GFX8:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3902  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3903  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3904  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3905  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3906  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
3907  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3908  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12
3909  ; GFX8:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3910  ; GFX8:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3911  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3912  ; GFX8: bb.2:
3913  ; GFX8:   successors: %bb.3, %bb.2
3914  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3915  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3916  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3917  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3918  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3919  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3920  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3921  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3922  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3923  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3924  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3925  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3926  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3927  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3928  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3929  ; GFX8: bb.3:
3930  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3931  ; GFX8: bb.4:
3932  ; GFX8:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3933  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3934  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3935  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3936  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3937  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3938  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3939  ; GFX8:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3940  ; GFX8:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3941  ; GFX8:   $vgpr0 = COPY [[COPY7]]
3942  ; GFX8:   $vgpr1 = COPY [[COPY8]]
3943  ; GFX8:   $vgpr2 = COPY [[COPY9]]
3944  ; GFX8:   $vgpr3 = COPY [[COPY10]]
3945  ; GFX8:   $vgpr4 = COPY [[COPY11]]
3946  ; GFX8:   $vgpr5 = COPY [[COPY12]]
3947  ; GFX8:   $vgpr6 = COPY [[COPY13]]
3948  ; GFX8:   $vgpr7 = COPY [[COPY14]]
3949  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3950  %soffset = add i32 %offset.base, 4076
3951  %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
3952  ret <8 x float> %val
3953}
3954
3955define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080(<4 x i32> %rsrc, i32 %offset.base) {
3956  ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
3957  ; GFX6: bb.1 (%ir-block.0):
3958  ; GFX6:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3959  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3960  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3961  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3962  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3963  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
3964  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3965  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080
3966  ; GFX6:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3967  ; GFX6:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3968  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
3969  ; GFX6: bb.2:
3970  ; GFX6:   successors: %bb.3, %bb.2
3971  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
3972  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
3973  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
3974  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
3975  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
3976  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
3977  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
3978  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
3979  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
3980  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3981  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3982  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
3983  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3984  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3985  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
3986  ; GFX6: bb.3:
3987  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
3988  ; GFX6: bb.4:
3989  ; GFX6:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3990  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
3991  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
3992  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
3993  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
3994  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
3995  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
3996  ; GFX6:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
3997  ; GFX6:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
3998  ; GFX6:   $vgpr0 = COPY [[COPY7]]
3999  ; GFX6:   $vgpr1 = COPY [[COPY8]]
4000  ; GFX6:   $vgpr2 = COPY [[COPY9]]
4001  ; GFX6:   $vgpr3 = COPY [[COPY10]]
4002  ; GFX6:   $vgpr4 = COPY [[COPY11]]
4003  ; GFX6:   $vgpr5 = COPY [[COPY12]]
4004  ; GFX6:   $vgpr6 = COPY [[COPY13]]
4005  ; GFX6:   $vgpr7 = COPY [[COPY14]]
4006  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4007  ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
4008  ; GFX7: bb.1 (%ir-block.0):
4009  ; GFX7:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
4010  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4011  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4012  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4013  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4014  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
4015  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4016  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080
4017  ; GFX7:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4018  ; GFX7:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4019  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
4020  ; GFX7: bb.2:
4021  ; GFX7:   successors: %bb.3, %bb.2
4022  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
4023  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
4024  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
4025  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
4026  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
4027  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
4028  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
4029  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
4030  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
4031  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4032  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
4033  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
4034  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4035  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4036  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
4037  ; GFX7: bb.3:
4038  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
4039  ; GFX7: bb.4:
4040  ; GFX7:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4041  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
4042  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
4043  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
4044  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
4045  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
4046  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
4047  ; GFX7:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
4048  ; GFX7:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
4049  ; GFX7:   $vgpr0 = COPY [[COPY7]]
4050  ; GFX7:   $vgpr1 = COPY [[COPY8]]
4051  ; GFX7:   $vgpr2 = COPY [[COPY9]]
4052  ; GFX7:   $vgpr3 = COPY [[COPY10]]
4053  ; GFX7:   $vgpr4 = COPY [[COPY11]]
4054  ; GFX7:   $vgpr5 = COPY [[COPY12]]
4055  ; GFX7:   $vgpr6 = COPY [[COPY13]]
4056  ; GFX7:   $vgpr7 = COPY [[COPY14]]
4057  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4058  ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
4059  ; GFX8: bb.1 (%ir-block.0):
4060  ; GFX8:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
4061  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4062  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4063  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4064  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4065  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
4066  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4067  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
4068  ; GFX8:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4069  ; GFX8:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4070  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
4071  ; GFX8: bb.2:
4072  ; GFX8:   successors: %bb.3, %bb.2
4073  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
4074  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
4075  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
4076  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
4077  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
4078  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
4079  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
4080  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
4081  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
4082  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4083  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
4084  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
4085  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4086  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4087  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
4088  ; GFX8: bb.3:
4089  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
4090  ; GFX8: bb.4:
4091  ; GFX8:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4092  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
4093  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
4094  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
4095  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
4096  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
4097  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
4098  ; GFX8:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
4099  ; GFX8:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
4100  ; GFX8:   $vgpr0 = COPY [[COPY7]]
4101  ; GFX8:   $vgpr1 = COPY [[COPY8]]
4102  ; GFX8:   $vgpr2 = COPY [[COPY9]]
4103  ; GFX8:   $vgpr3 = COPY [[COPY10]]
4104  ; GFX8:   $vgpr4 = COPY [[COPY11]]
4105  ; GFX8:   $vgpr5 = COPY [[COPY12]]
4106  ; GFX8:   $vgpr6 = COPY [[COPY13]]
4107  ; GFX8:   $vgpr7 = COPY [[COPY14]]
4108  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4109  %soffset = add i32 %offset.base, 4080
4110  %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
4111  ret <8 x float> %val
4112}
4113
4114define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064(<4 x i32> %rsrc, i32 %offset.base) {
4115  ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064
4116  ; GFX6: bb.1 (%ir-block.0):
4117  ; GFX6:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
4118  ; GFX6:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4119  ; GFX6:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4120  ; GFX6:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4121  ; GFX6:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4122  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4123  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4124  ; GFX6:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4125  ; GFX6:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4126  ; GFX6:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
4127  ; GFX6: bb.2:
4128  ; GFX6:   successors: %bb.3, %bb.2
4129  ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
4130  ; GFX6:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
4131  ; GFX6:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
4132  ; GFX6:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
4133  ; GFX6:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
4134  ; GFX6:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
4135  ; GFX6:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
4136  ; GFX6:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
4137  ; GFX6:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
4138  ; GFX6:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4139  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4)
4140  ; GFX6:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4)
4141  ; GFX6:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4142  ; GFX6:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4143  ; GFX6:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
4144  ; GFX6: bb.3:
4145  ; GFX6:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
4146  ; GFX6: bb.4:
4147  ; GFX6:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
4148  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
4149  ; GFX6:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
4150  ; GFX6:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
4151  ; GFX6:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
4152  ; GFX6:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
4153  ; GFX6:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
4154  ; GFX6:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
4155  ; GFX6:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
4156  ; GFX6:   $vgpr0 = COPY [[COPY6]]
4157  ; GFX6:   $vgpr1 = COPY [[COPY7]]
4158  ; GFX6:   $vgpr2 = COPY [[COPY8]]
4159  ; GFX6:   $vgpr3 = COPY [[COPY9]]
4160  ; GFX6:   $vgpr4 = COPY [[COPY10]]
4161  ; GFX6:   $vgpr5 = COPY [[COPY11]]
4162  ; GFX6:   $vgpr6 = COPY [[COPY12]]
4163  ; GFX6:   $vgpr7 = COPY [[COPY13]]
4164  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4165  ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064
4166  ; GFX7: bb.1 (%ir-block.0):
4167  ; GFX7:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
4168  ; GFX7:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4169  ; GFX7:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4170  ; GFX7:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4171  ; GFX7:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4172  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4173  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4174  ; GFX7:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4175  ; GFX7:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4176  ; GFX7:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
4177  ; GFX7: bb.2:
4178  ; GFX7:   successors: %bb.3, %bb.2
4179  ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
4180  ; GFX7:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
4181  ; GFX7:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
4182  ; GFX7:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
4183  ; GFX7:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
4184  ; GFX7:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
4185  ; GFX7:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
4186  ; GFX7:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
4187  ; GFX7:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
4188  ; GFX7:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4189  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4)
4190  ; GFX7:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4)
4191  ; GFX7:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4192  ; GFX7:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4193  ; GFX7:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
4194  ; GFX7: bb.3:
4195  ; GFX7:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
4196  ; GFX7: bb.4:
4197  ; GFX7:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
4198  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
4199  ; GFX7:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
4200  ; GFX7:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
4201  ; GFX7:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
4202  ; GFX7:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
4203  ; GFX7:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
4204  ; GFX7:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
4205  ; GFX7:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
4206  ; GFX7:   $vgpr0 = COPY [[COPY6]]
4207  ; GFX7:   $vgpr1 = COPY [[COPY7]]
4208  ; GFX7:   $vgpr2 = COPY [[COPY8]]
4209  ; GFX7:   $vgpr3 = COPY [[COPY9]]
4210  ; GFX7:   $vgpr4 = COPY [[COPY10]]
4211  ; GFX7:   $vgpr5 = COPY [[COPY11]]
4212  ; GFX7:   $vgpr6 = COPY [[COPY12]]
4213  ; GFX7:   $vgpr7 = COPY [[COPY13]]
4214  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4215  ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064
4216  ; GFX8: bb.1 (%ir-block.0):
4217  ; GFX8:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
4218  ; GFX8:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4219  ; GFX8:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4220  ; GFX8:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4221  ; GFX8:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4222  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4223  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4224  ; GFX8:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4225  ; GFX8:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4226  ; GFX8:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
4227  ; GFX8: bb.2:
4228  ; GFX8:   successors: %bb.3, %bb.2
4229  ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
4230  ; GFX8:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
4231  ; GFX8:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
4232  ; GFX8:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
4233  ; GFX8:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
4234  ; GFX8:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
4235  ; GFX8:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
4236  ; GFX8:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
4237  ; GFX8:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
4238  ; GFX8:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4239  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4)
4240  ; GFX8:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 + 4064, align 4)
4241  ; GFX8:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4242  ; GFX8:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4243  ; GFX8:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
4244  ; GFX8: bb.3:
4245  ; GFX8:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
4246  ; GFX8: bb.4:
4247  ; GFX8:   [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
4248  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
4249  ; GFX8:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
4250  ; GFX8:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
4251  ; GFX8:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
4252  ; GFX8:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
4253  ; GFX8:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
4254  ; GFX8:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
4255  ; GFX8:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
4256  ; GFX8:   $vgpr0 = COPY [[COPY6]]
4257  ; GFX8:   $vgpr1 = COPY [[COPY7]]
4258  ; GFX8:   $vgpr2 = COPY [[COPY8]]
4259  ; GFX8:   $vgpr3 = COPY [[COPY9]]
4260  ; GFX8:   $vgpr4 = COPY [[COPY10]]
4261  ; GFX8:   $vgpr5 = COPY [[COPY11]]
4262  ; GFX8:   $vgpr6 = COPY [[COPY12]]
4263  ; GFX8:   $vgpr7 = COPY [[COPY13]]
4264  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4265  %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0)
4266  ret <8 x float> %val
4267}
4268
4269define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
4270  ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr
4271  ; GFX6: bb.1 (%ir-block.0):
4272  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4273  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4274  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4275  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4276  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4277  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4278  ; GFX6:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4279  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4280  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4281  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4282  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
4283  ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr
4284  ; GFX7: bb.1 (%ir-block.0):
4285  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4286  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4287  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4288  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4289  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4290  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4291  ; GFX7:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4292  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4293  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4294  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4295  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
4296  ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr
4297  ; GFX8: bb.1 (%ir-block.0):
4298  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4299  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4300  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4301  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4302  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4303  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4304  ; GFX8:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4305  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4306  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4307  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4308  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
4309  %offset = add i32 %offset.v, %offset.s
4310  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
4311  ret float %val
4312}
4313
4314define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
4315  ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr
4316  ; GFX6: bb.1 (%ir-block.0):
4317  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4318  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4319  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4320  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4321  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4322  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4323  ; GFX6:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4324  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4325  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4326  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4327  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
4328  ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr
4329  ; GFX7: bb.1 (%ir-block.0):
4330  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4331  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4332  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4333  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4334  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4335  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4336  ; GFX7:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4337  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4338  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4339  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4340  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
4341  ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr
4342  ; GFX8: bb.1 (%ir-block.0):
4343  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4344  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4345  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4346  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4347  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4348  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4349  ; GFX8:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4350  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4351  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4352  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4353  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
4354  %offset = add i32 %offset.s, %offset.v
4355  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
4356  ret float %val
4357}
4358
4359define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
4360  ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm
4361  ; GFX6: bb.1 (%ir-block.0):
4362  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4363  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4364  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4365  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4366  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4367  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4368  ; GFX6:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4369  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4370  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
4371  ; GFX6:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
4372  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4373  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4374  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4375  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
4376  ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm
4377  ; GFX7: bb.1 (%ir-block.0):
4378  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4379  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4380  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4381  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4382  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4383  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4384  ; GFX7:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4385  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4386  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
4387  ; GFX7:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
4388  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4389  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4390  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4391  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
4392  ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm
4393  ; GFX8: bb.1 (%ir-block.0):
4394  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4395  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4396  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4397  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4398  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4399  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4400  ; GFX8:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4401  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4402  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
4403  ; GFX8:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
4404  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4405  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4406  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4407  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
4408  %offset.base = add i32 %offset.v, %offset.s
4409  %offset = add i32 %offset.base, 1024
4410  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
4411  ret float %val
4412}
4413
4414define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
4415  ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm
4416  ; GFX6: bb.1 (%ir-block.0):
4417  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4418  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4419  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4420  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4421  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4422  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4423  ; GFX6:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4424  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4425  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
4426  ; GFX6:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
4427  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4428  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4429  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4430  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
4431  ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm
4432  ; GFX7: bb.1 (%ir-block.0):
4433  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4434  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4435  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4436  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4437  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4438  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4439  ; GFX7:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4440  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4441  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
4442  ; GFX7:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
4443  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4444  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4445  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4446  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
4447  ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm
4448  ; GFX8: bb.1 (%ir-block.0):
4449  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4450  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4451  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4452  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4453  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4454  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4455  ; GFX8:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4456  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4457  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
4458  ; GFX8:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
4459  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4460  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4461  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4462  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
4463  %offset.base = add i32 %offset.s, %offset.v
4464  %offset = add i32 %offset.base, 1024
4465  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
4466  ret float %val
4467}
4468
4469; TODO: Ideally this would be reassociated to fold.
4470define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
4471  ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr
4472  ; GFX6: bb.1 (%ir-block.0):
4473  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4474  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4475  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4476  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4477  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4478  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4479  ; GFX6:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4480  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4481  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
4482  ; GFX6:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
4483  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4484  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4485  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
4486  ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr
4487  ; GFX7: bb.1 (%ir-block.0):
4488  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4489  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4490  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4491  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4492  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4493  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4494  ; GFX7:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4495  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4496  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
4497  ; GFX7:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
4498  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4499  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4500  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
4501  ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr
4502  ; GFX8: bb.1 (%ir-block.0):
4503  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4504  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4505  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4506  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4507  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4508  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4509  ; GFX8:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4510  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4511  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
4512  ; GFX8:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
4513  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4514  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4515  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
4516  %offset.base = add i32 %offset.s, 1024
4517  %offset = add i32 %offset.base, %offset.v
4518  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
4519  ret float %val
4520}
4521
4522define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
4523  ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr
4524  ; GFX6: bb.1 (%ir-block.0):
4525  ; GFX6:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4526  ; GFX6:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4527  ; GFX6:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4528  ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4529  ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4530  ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4531  ; GFX6:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4532  ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4533  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
4534  ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
4535  ; GFX6:   %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
4536  ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4537  ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4538  ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
4539  ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr
4540  ; GFX7: bb.1 (%ir-block.0):
4541  ; GFX7:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4542  ; GFX7:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4543  ; GFX7:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4544  ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4545  ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4546  ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4547  ; GFX7:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4548  ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4549  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
4550  ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
4551  ; GFX7:   %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
4552  ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4553  ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4554  ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
4555  ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr
4556  ; GFX8: bb.1 (%ir-block.0):
4557  ; GFX8:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4558  ; GFX8:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4559  ; GFX8:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4560  ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4561  ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4562  ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4563  ; GFX8:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4564  ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4565  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
4566  ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
4567  ; GFX8:   %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
4568  ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
4569  ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4570  ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
4571  %offset.base = add i32 %offset.v, 1024
4572  %offset = add i32 %offset.base, %offset.s
4573  %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
4574  ret float %val
4575}
4576
4577declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
4578declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg)
4579declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32 immarg)
4580declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32 immarg)
4581declare <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32>, i32, i32 immarg)
4582declare <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32>, i32, i32 immarg)
4583
4584declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32      immarg)
4585declare <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32>, i32, i32 immarg)
4586declare <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32>, i32, i32 immarg)
4587declare <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32>, i32, i32 immarg)
4588declare <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32>, i32, i32 immarg)
4589declare <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32>, i32, i32 immarg)
4590
4591declare i96 @llvm.amdgcn.s.buffer.load.i96(<4 x i32>, i32, i32 immarg)
4592declare i256 @llvm.amdgcn.s.buffer.load.i256(<4 x i32>, i32, i32 immarg)
4593declare i512 @llvm.amdgcn.s.buffer.load.i512(<4 x i32>, i32, i32 immarg)
4594
4595declare <16 x i16> @llvm.amdgcn.s.buffer.load.v16i16(<4 x i32>, i32, i32 immarg)
4596declare <32 x i16> @llvm.amdgcn.s.buffer.load.v32i16(<4 x i32>, i32, i32 immarg)
4597
4598declare <4 x i64> @llvm.amdgcn.s.buffer.load.v4i64(<4 x i32>, i32, i32 immarg)
4599declare <8 x i64> @llvm.amdgcn.s.buffer.load.v8i64(<4 x i32>, i32, i32 immarg)
4600
4601declare <4 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v4p1i8(<4 x i32>, i32, i32 immarg)
4602declare <8 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v8p1i8(<4 x i32>, i32, i32 immarg)
4603