1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
4
5; Test end to end matching of addressing modes when MUBUF is used for
6; global memory.
7
8define amdgpu_ps void @mubuf_store_sgpr_ptr(i32 addrspace(1)* inreg %ptr) {
9; GFX6-LABEL: mubuf_store_sgpr_ptr:
10; GFX6:       ; %bb.0:
11; GFX6-NEXT:    s_mov_b32 s0, s2
12; GFX6-NEXT:    s_mov_b32 s1, s3
13; GFX6-NEXT:    v_mov_b32_e32 v0, 0
14; GFX6-NEXT:    s_mov_b32 s2, -1
15; GFX6-NEXT:    s_mov_b32 s3, 0xf000
16; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
17; GFX6-NEXT:    s_endpgm
18;
19; GFX7-LABEL: mubuf_store_sgpr_ptr:
20; GFX7:       ; %bb.0:
21; GFX7-NEXT:    s_mov_b32 s0, s2
22; GFX7-NEXT:    s_mov_b32 s1, s3
23; GFX7-NEXT:    v_mov_b32_e32 v0, 0
24; GFX7-NEXT:    s_mov_b32 s2, -1
25; GFX7-NEXT:    s_mov_b32 s3, 0xf000
26; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
27; GFX7-NEXT:    s_endpgm
28  store i32 0, i32 addrspace(1)* %ptr
29  ret void
30}
31
32define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr) {
33; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095:
34; GFX6:       ; %bb.0:
35; GFX6-NEXT:    s_mov_b32 s0, s2
36; GFX6-NEXT:    s_mov_b32 s1, s3
37; GFX6-NEXT:    v_mov_b32_e32 v0, 0
38; GFX6-NEXT:    s_mov_b32 s2, -1
39; GFX6-NEXT:    s_mov_b32 s3, 0xf000
40; GFX6-NEXT:    s_movk_i32 s4, 0x3ffc
41; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], s4
42; GFX6-NEXT:    s_endpgm
43;
44; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4095:
45; GFX7:       ; %bb.0:
46; GFX7-NEXT:    s_mov_b32 s0, s2
47; GFX7-NEXT:    s_mov_b32 s1, s3
48; GFX7-NEXT:    v_mov_b32_e32 v0, 0
49; GFX7-NEXT:    s_mov_b32 s2, -1
50; GFX7-NEXT:    s_mov_b32 s3, 0xf000
51; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
52; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], s4
53; GFX7-NEXT:    s_endpgm
54  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
55  store i32 0, i32 addrspace(1)* %gep
56  ret void
57}
58
59define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr) {
60; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967296:
61; GFX6:       ; %bb.0:
62; GFX6-NEXT:    s_mov_b32 s4, 0
63; GFX6-NEXT:    s_mov_b32 s5, 4
64; GFX6-NEXT:    v_mov_b32_e32 v0, s4
65; GFX6-NEXT:    s_mov_b32 s0, s2
66; GFX6-NEXT:    s_mov_b32 s1, s3
67; GFX6-NEXT:    v_mov_b32_e32 v2, 0
68; GFX6-NEXT:    s_mov_b32 s3, 0xf000
69; GFX6-NEXT:    s_mov_b32 s2, s4
70; GFX6-NEXT:    v_mov_b32_e32 v1, s5
71; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
72; GFX6-NEXT:    s_endpgm
73;
74; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4294967296:
75; GFX7:       ; %bb.0:
76; GFX7-NEXT:    s_mov_b32 s4, 0
77; GFX7-NEXT:    s_mov_b32 s5, 4
78; GFX7-NEXT:    v_mov_b32_e32 v0, s4
79; GFX7-NEXT:    s_mov_b32 s0, s2
80; GFX7-NEXT:    s_mov_b32 s1, s3
81; GFX7-NEXT:    v_mov_b32_e32 v2, 0
82; GFX7-NEXT:    s_mov_b32 s3, 0xf000
83; GFX7-NEXT:    s_mov_b32 s2, s4
84; GFX7-NEXT:    v_mov_b32_e32 v1, s5
85; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
86; GFX7-NEXT:    s_endpgm
87  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
88  store i32 0, i32 addrspace(1)* %gep
89  ret void
90}
91
92define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967297(i32 addrspace(1)* inreg %ptr) {
93; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967297:
94; GFX6:       ; %bb.0:
95; GFX6-NEXT:    s_mov_b32 s4, 4
96; GFX6-NEXT:    s_mov_b32 s5, s4
97; GFX6-NEXT:    v_mov_b32_e32 v0, s4
98; GFX6-NEXT:    s_mov_b32 s0, s2
99; GFX6-NEXT:    s_mov_b32 s1, s3
100; GFX6-NEXT:    s_mov_b32 s2, 0
101; GFX6-NEXT:    v_mov_b32_e32 v2, 0
102; GFX6-NEXT:    s_mov_b32 s3, 0xf000
103; GFX6-NEXT:    v_mov_b32_e32 v1, s5
104; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
105; GFX6-NEXT:    s_endpgm
106;
107; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4294967297:
108; GFX7:       ; %bb.0:
109; GFX7-NEXT:    s_mov_b32 s4, 4
110; GFX7-NEXT:    s_mov_b32 s5, s4
111; GFX7-NEXT:    v_mov_b32_e32 v0, s4
112; GFX7-NEXT:    s_mov_b32 s0, s2
113; GFX7-NEXT:    s_mov_b32 s1, s3
114; GFX7-NEXT:    s_mov_b32 s2, 0
115; GFX7-NEXT:    v_mov_b32_e32 v2, 0
116; GFX7-NEXT:    s_mov_b32 s3, 0xf000
117; GFX7-NEXT:    v_mov_b32_e32 v1, s5
118; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
119; GFX7-NEXT:    s_endpgm
120  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967297
121  store i32 0, i32 addrspace(1)* %gep
122  ret void
123}
124
125define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4096(i32 addrspace(1)* inreg %ptr) {
126; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4096:
127; GFX6:       ; %bb.0:
128; GFX6-NEXT:    s_mov_b32 s0, s2
129; GFX6-NEXT:    s_mov_b32 s1, s3
130; GFX6-NEXT:    v_mov_b32_e32 v0, 0
131; GFX6-NEXT:    s_mov_b32 s2, -1
132; GFX6-NEXT:    s_mov_b32 s3, 0xf000
133; GFX6-NEXT:    s_movk_i32 s4, 0x4000
134; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], s4
135; GFX6-NEXT:    s_endpgm
136;
137; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4096:
138; GFX7:       ; %bb.0:
139; GFX7-NEXT:    s_mov_b32 s0, s2
140; GFX7-NEXT:    s_mov_b32 s1, s3
141; GFX7-NEXT:    v_mov_b32_e32 v0, 0
142; GFX7-NEXT:    s_mov_b32 s2, -1
143; GFX7-NEXT:    s_mov_b32 s3, 0xf000
144; GFX7-NEXT:    s_movk_i32 s4, 0x4000
145; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], s4
146; GFX7-NEXT:    s_endpgm
147  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4096
148  store i32 0, i32 addrspace(1)* %gep
149  ret void
150}
151
152define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr) {
153; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4095:
154; GFX6:       ; %bb.0:
155; GFX6-NEXT:    s_mov_b32 s2, 0
156; GFX6-NEXT:    v_mov_b32_e32 v2, 0
157; GFX6-NEXT:    s_mov_b32 s3, 0xf000
158; GFX6-NEXT:    s_mov_b64 s[0:1], 0
159; GFX6-NEXT:    s_movk_i32 s4, 0x3ffc
160; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
161; GFX6-NEXT:    s_endpgm
162;
163; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4095:
164; GFX7:       ; %bb.0:
165; GFX7-NEXT:    s_mov_b32 s2, 0
166; GFX7-NEXT:    v_mov_b32_e32 v2, 0
167; GFX7-NEXT:    s_mov_b32 s3, 0xf000
168; GFX7-NEXT:    s_mov_b64 s[0:1], 0
169; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
170; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
171; GFX7-NEXT:    s_endpgm
172  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
173  store i32 0, i32 addrspace(1)* %gep
174  ret void
175}
176
177define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr) {
178; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967296:
179; GFX6:       ; %bb.0:
180; GFX6-NEXT:    s_mov_b32 s0, 0
181; GFX6-NEXT:    s_mov_b32 s1, 4
182; GFX6-NEXT:    v_mov_b32_e32 v2, 0
183; GFX6-NEXT:    s_mov_b32 s3, 0xf000
184; GFX6-NEXT:    s_mov_b32 s2, s0
185; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
186; GFX6-NEXT:    s_endpgm
187;
188; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4294967296:
189; GFX7:       ; %bb.0:
190; GFX7-NEXT:    s_mov_b32 s0, 0
191; GFX7-NEXT:    s_mov_b32 s1, 4
192; GFX7-NEXT:    v_mov_b32_e32 v2, 0
193; GFX7-NEXT:    s_mov_b32 s3, 0xf000
194; GFX7-NEXT:    s_mov_b32 s2, s0
195; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
196; GFX7-NEXT:    s_endpgm
197  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
198  store i32 0, i32 addrspace(1)* %gep
199  ret void
200}
201
202define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(i32 addrspace(1)* %ptr) {
203; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967297:
204; GFX6:       ; %bb.0:
205; GFX6-NEXT:    s_mov_b32 s0, 4
206; GFX6-NEXT:    s_mov_b32 s1, s0
207; GFX6-NEXT:    s_mov_b32 s2, 0
208; GFX6-NEXT:    v_mov_b32_e32 v2, 0
209; GFX6-NEXT:    s_mov_b32 s3, 0xf000
210; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
211; GFX6-NEXT:    s_endpgm
212;
213; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4294967297:
214; GFX7:       ; %bb.0:
215; GFX7-NEXT:    s_mov_b32 s0, 4
216; GFX7-NEXT:    s_mov_b32 s1, s0
217; GFX7-NEXT:    s_mov_b32 s2, 0
218; GFX7-NEXT:    v_mov_b32_e32 v2, 0
219; GFX7-NEXT:    s_mov_b32 s3, 0xf000
220; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
221; GFX7-NEXT:    s_endpgm
222  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967297
223  store i32 0, i32 addrspace(1)* %gep
224  ret void
225}
226
227define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4096(i32 addrspace(1)* %ptr) {
228; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4096:
229; GFX6:       ; %bb.0:
230; GFX6-NEXT:    s_mov_b32 s2, 0
231; GFX6-NEXT:    v_mov_b32_e32 v2, 0
232; GFX6-NEXT:    s_mov_b32 s3, 0xf000
233; GFX6-NEXT:    s_mov_b64 s[0:1], 0
234; GFX6-NEXT:    s_movk_i32 s4, 0x4000
235; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
236; GFX6-NEXT:    s_endpgm
237;
238; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4096:
239; GFX7:       ; %bb.0:
240; GFX7-NEXT:    s_mov_b32 s2, 0
241; GFX7-NEXT:    v_mov_b32_e32 v2, 0
242; GFX7-NEXT:    s_mov_b32 s3, 0xf000
243; GFX7-NEXT:    s_mov_b64 s[0:1], 0
244; GFX7-NEXT:    s_movk_i32 s4, 0x4000
245; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
246; GFX7-NEXT:    s_endpgm
247  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4096
248  store i32 0, i32 addrspace(1)* %gep
249  ret void
250}
251
252define amdgpu_ps void @mubuf_store_sgpr_ptr_sgpr_offset(i32 addrspace(1)* inreg %ptr, i32 inreg %soffset) {
253; GFX6-LABEL: mubuf_store_sgpr_ptr_sgpr_offset:
254; GFX6:       ; %bb.0:
255; GFX6-NEXT:    s_mov_b32 s0, s2
256; GFX6-NEXT:    s_mov_b32 s1, s3
257; GFX6-NEXT:    s_bfe_i64 s[2:3], s[4:5], 0x200000
258; GFX6-NEXT:    s_lshl_b64 s[4:5], s[2:3], 2
259; GFX6-NEXT:    v_mov_b32_e32 v0, s4
260; GFX6-NEXT:    s_mov_b32 s2, 0
261; GFX6-NEXT:    v_mov_b32_e32 v2, 0
262; GFX6-NEXT:    s_mov_b32 s3, 0xf000
263; GFX6-NEXT:    v_mov_b32_e32 v1, s5
264; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
265; GFX6-NEXT:    s_endpgm
266;
267; GFX7-LABEL: mubuf_store_sgpr_ptr_sgpr_offset:
268; GFX7:       ; %bb.0:
269; GFX7-NEXT:    s_mov_b32 s0, s2
270; GFX7-NEXT:    s_mov_b32 s1, s3
271; GFX7-NEXT:    s_bfe_i64 s[2:3], s[4:5], 0x200000
272; GFX7-NEXT:    s_lshl_b64 s[4:5], s[2:3], 2
273; GFX7-NEXT:    v_mov_b32_e32 v0, s4
274; GFX7-NEXT:    s_mov_b32 s2, 0
275; GFX7-NEXT:    v_mov_b32_e32 v2, 0
276; GFX7-NEXT:    s_mov_b32 s3, 0xf000
277; GFX7-NEXT:    v_mov_b32_e32 v1, s5
278; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
279; GFX7-NEXT:    s_endpgm
280  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset
281  store i32 0, i32 addrspace(1)* %gep
282  ret void
283}
284
285define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset(i32 addrspace(1)* %ptr, i32 inreg %soffset) {
286; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset:
287; GFX6:       ; %bb.0:
288; GFX6-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
289; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
290; GFX6-NEXT:    s_mov_b32 s2, 0
291; GFX6-NEXT:    v_mov_b32_e32 v2, 0
292; GFX6-NEXT:    s_mov_b32 s3, 0xf000
293; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
294; GFX6-NEXT:    s_endpgm
295;
296; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset:
297; GFX7:       ; %bb.0:
298; GFX7-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
299; GFX7-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
300; GFX7-NEXT:    s_mov_b32 s2, 0
301; GFX7-NEXT:    v_mov_b32_e32 v2, 0
302; GFX7-NEXT:    s_mov_b32 s3, 0xf000
303; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
304; GFX7-NEXT:    s_endpgm
305  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset
306  store i32 0, i32 addrspace(1)* %gep
307  ret void
308}
309
310define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset_offset256(i32 addrspace(1)* %ptr, i32 inreg %soffset) {
311; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset_offset256:
312; GFX6:       ; %bb.0:
313; GFX6-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
314; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
315; GFX6-NEXT:    s_mov_b32 s2, 0
316; GFX6-NEXT:    v_mov_b32_e32 v2, 0
317; GFX6-NEXT:    s_mov_b32 s3, 0xf000
318; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024
319; GFX6-NEXT:    s_endpgm
320;
321; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset_offset256:
322; GFX7:       ; %bb.0:
323; GFX7-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
324; GFX7-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
325; GFX7-NEXT:    s_mov_b32 s2, 0
326; GFX7-NEXT:    v_mov_b32_e32 v2, 0
327; GFX7-NEXT:    s_mov_b32 s3, 0xf000
328; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024
329; GFX7-NEXT:    s_endpgm
330  %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset
331  %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 256
332  store i32 0, i32 addrspace(1)* %gep1
333  ret void
334}
335
336define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset256_offset(i32 addrspace(1)* %ptr, i32 inreg %soffset) {
337; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset:
338; GFX6:       ; %bb.0:
339; GFX6-NEXT:    s_movk_i32 s4, 0x400
340; GFX6-NEXT:    s_mov_b32 s5, 0
341; GFX6-NEXT:    v_mov_b32_e32 v2, s4
342; GFX6-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
343; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
344; GFX6-NEXT:    v_mov_b32_e32 v3, s5
345; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
346; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
347; GFX6-NEXT:    v_mov_b32_e32 v2, 0
348; GFX6-NEXT:    s_mov_b32 s3, 0xf000
349; GFX6-NEXT:    s_mov_b32 s2, s5
350; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
351; GFX6-NEXT:    s_endpgm
352;
353; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset:
354; GFX7:       ; %bb.0:
355; GFX7-NEXT:    s_movk_i32 s4, 0x400
356; GFX7-NEXT:    s_mov_b32 s5, 0
357; GFX7-NEXT:    v_mov_b32_e32 v2, s4
358; GFX7-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
359; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
360; GFX7-NEXT:    v_mov_b32_e32 v3, s5
361; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
362; GFX7-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
363; GFX7-NEXT:    v_mov_b32_e32 v2, 0
364; GFX7-NEXT:    s_mov_b32 s3, 0xf000
365; GFX7-NEXT:    s_mov_b32 s2, s5
366; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
367; GFX7-NEXT:    s_endpgm
368  %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 256
369  %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 %soffset
370  store i32 0, i32 addrspace(1)* %gep1
371  ret void
372}
373
374define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) {
375; GFX6-LABEL: mubuf_store_sgpr_ptr_vgpr_offset:
376; GFX6:       ; %bb.0:
377; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
378; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
379; GFX6-NEXT:    s_mov_b32 s0, s2
380; GFX6-NEXT:    s_mov_b32 s1, s3
381; GFX6-NEXT:    s_mov_b32 s2, 0
382; GFX6-NEXT:    v_mov_b32_e32 v2, 0
383; GFX6-NEXT:    s_mov_b32 s3, 0xf000
384; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
385; GFX6-NEXT:    s_endpgm
386;
387; GFX7-LABEL: mubuf_store_sgpr_ptr_vgpr_offset:
388; GFX7:       ; %bb.0:
389; GFX7-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
390; GFX7-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
391; GFX7-NEXT:    s_mov_b32 s0, s2
392; GFX7-NEXT:    s_mov_b32 s1, s3
393; GFX7-NEXT:    s_mov_b32 s2, 0
394; GFX7-NEXT:    v_mov_b32_e32 v2, 0
395; GFX7-NEXT:    s_mov_b32 s3, 0xf000
396; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
397; GFX7-NEXT:    s_endpgm
398  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset
399  store i32 0, i32 addrspace(1)* %gep
400  ret void
401}
402
403define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset_offset4095(i32 addrspace(1)* inreg %ptr, i32 %voffset) {
404; GFX6-LABEL: mubuf_store_sgpr_ptr_vgpr_offset_offset4095:
405; GFX6:       ; %bb.0:
406; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
407; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
408; GFX6-NEXT:    s_mov_b32 s0, s2
409; GFX6-NEXT:    s_mov_b32 s1, s3
410; GFX6-NEXT:    s_mov_b32 s2, 0
411; GFX6-NEXT:    v_mov_b32_e32 v2, 0
412; GFX6-NEXT:    s_mov_b32 s3, 0xf000
413; GFX6-NEXT:    s_movk_i32 s4, 0x3ffc
414; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
415; GFX6-NEXT:    s_endpgm
416;
417; GFX7-LABEL: mubuf_store_sgpr_ptr_vgpr_offset_offset4095:
418; GFX7:       ; %bb.0:
419; GFX7-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
420; GFX7-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
421; GFX7-NEXT:    s_mov_b32 s0, s2
422; GFX7-NEXT:    s_mov_b32 s1, s3
423; GFX7-NEXT:    s_mov_b32 s2, 0
424; GFX7-NEXT:    v_mov_b32_e32 v2, 0
425; GFX7-NEXT:    s_mov_b32 s3, 0xf000
426; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
427; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
428; GFX7-NEXT:    s_endpgm
429  %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset
430  %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 4095
431  store i32 0, i32 addrspace(1)* %gep1
432  ret void
433}
434define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) {
435; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset:
436; GFX6:       ; %bb.0:
437; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
438; GFX6-NEXT:    s_add_u32 s0, s2, 0x3ffc
439; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
440; GFX6-NEXT:    s_addc_u32 s1, s3, 0
441; GFX6-NEXT:    s_mov_b32 s2, 0
442; GFX6-NEXT:    v_mov_b32_e32 v2, 0
443; GFX6-NEXT:    s_mov_b32 s3, 0xf000
444; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
445; GFX6-NEXT:    s_endpgm
446;
447; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset:
448; GFX7:       ; %bb.0:
449; GFX7-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
450; GFX7-NEXT:    s_add_u32 s0, s2, 0x3ffc
451; GFX7-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
452; GFX7-NEXT:    s_addc_u32 s1, s3, 0
453; GFX7-NEXT:    s_mov_b32 s2, 0
454; GFX7-NEXT:    v_mov_b32_e32 v2, 0
455; GFX7-NEXT:    s_mov_b32 s3, 0xf000
456; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
457; GFX7-NEXT:    s_endpgm
458  %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 4095
459  %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 %voffset
460  store i32 0, i32 addrspace(1)* %gep1
461  ret void
462}
463
464define amdgpu_ps float @mubuf_load_sgpr_ptr(float addrspace(1)* inreg %ptr) {
465; GFX6-LABEL: mubuf_load_sgpr_ptr:
466; GFX6:       ; %bb.0:
467; GFX6-NEXT:    s_mov_b32 s0, s2
468; GFX6-NEXT:    s_mov_b32 s1, s3
469; GFX6-NEXT:    s_mov_b32 s2, -1
470; GFX6-NEXT:    s_mov_b32 s3, 0xf000
471; GFX6-NEXT:    buffer_load_dword v0, off, s[0:3], 0
472; GFX6-NEXT:    s_waitcnt vmcnt(0)
473; GFX6-NEXT:    ; return to shader part epilog
474;
475; GFX7-LABEL: mubuf_load_sgpr_ptr:
476; GFX7:       ; %bb.0:
477; GFX7-NEXT:    s_mov_b32 s0, s2
478; GFX7-NEXT:    s_mov_b32 s1, s3
479; GFX7-NEXT:    s_mov_b32 s2, -1
480; GFX7-NEXT:    s_mov_b32 s3, 0xf000
481; GFX7-NEXT:    buffer_load_dword v0, off, s[0:3], 0
482; GFX7-NEXT:    s_waitcnt vmcnt(0)
483; GFX7-NEXT:    ; return to shader part epilog
484  %val = load volatile float, float addrspace(1)* %ptr
485  ret float %val
486}
487
488define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095(float addrspace(1)* inreg %ptr) {
489; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4095:
490; GFX6:       ; %bb.0:
491; GFX6-NEXT:    s_mov_b32 s0, s2
492; GFX6-NEXT:    s_mov_b32 s1, s3
493; GFX6-NEXT:    s_mov_b32 s2, -1
494; GFX6-NEXT:    s_mov_b32 s3, 0xf000
495; GFX6-NEXT:    s_movk_i32 s4, 0x3ffc
496; GFX6-NEXT:    buffer_load_dword v0, off, s[0:3], s4
497; GFX6-NEXT:    s_waitcnt vmcnt(0)
498; GFX6-NEXT:    ; return to shader part epilog
499;
500; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4095:
501; GFX7:       ; %bb.0:
502; GFX7-NEXT:    s_mov_b32 s0, s2
503; GFX7-NEXT:    s_mov_b32 s1, s3
504; GFX7-NEXT:    s_mov_b32 s2, -1
505; GFX7-NEXT:    s_mov_b32 s3, 0xf000
506; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
507; GFX7-NEXT:    buffer_load_dword v0, off, s[0:3], s4
508; GFX7-NEXT:    s_waitcnt vmcnt(0)
509; GFX7-NEXT:    ; return to shader part epilog
510  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4095
511  %val = load volatile float, float addrspace(1)* %gep
512  ret float %val
513}
514
515define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967296(float addrspace(1)* inreg %ptr) {
516; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967296:
517; GFX6:       ; %bb.0:
518; GFX6-NEXT:    s_mov_b32 s4, 0
519; GFX6-NEXT:    s_mov_b32 s5, 4
520; GFX6-NEXT:    v_mov_b32_e32 v0, s4
521; GFX6-NEXT:    s_mov_b32 s0, s2
522; GFX6-NEXT:    s_mov_b32 s1, s3
523; GFX6-NEXT:    s_mov_b32 s3, 0xf000
524; GFX6-NEXT:    s_mov_b32 s2, s4
525; GFX6-NEXT:    v_mov_b32_e32 v1, s5
526; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
527; GFX6-NEXT:    s_waitcnt vmcnt(0)
528; GFX6-NEXT:    ; return to shader part epilog
529;
530; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4294967296:
531; GFX7:       ; %bb.0:
532; GFX7-NEXT:    s_mov_b32 s4, 0
533; GFX7-NEXT:    s_mov_b32 s5, 4
534; GFX7-NEXT:    v_mov_b32_e32 v0, s4
535; GFX7-NEXT:    s_mov_b32 s0, s2
536; GFX7-NEXT:    s_mov_b32 s1, s3
537; GFX7-NEXT:    s_mov_b32 s3, 0xf000
538; GFX7-NEXT:    s_mov_b32 s2, s4
539; GFX7-NEXT:    v_mov_b32_e32 v1, s5
540; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
541; GFX7-NEXT:    s_waitcnt vmcnt(0)
542; GFX7-NEXT:    ; return to shader part epilog
543  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967296
544  %val = load volatile float, float addrspace(1)* %gep
545  ret float %val
546}
547
548define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967297(float addrspace(1)* inreg %ptr) {
549; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967297:
550; GFX6:       ; %bb.0:
551; GFX6-NEXT:    s_mov_b32 s4, 4
552; GFX6-NEXT:    s_mov_b32 s5, s4
553; GFX6-NEXT:    v_mov_b32_e32 v0, s4
554; GFX6-NEXT:    s_mov_b32 s0, s2
555; GFX6-NEXT:    s_mov_b32 s1, s3
556; GFX6-NEXT:    s_mov_b32 s2, 0
557; GFX6-NEXT:    s_mov_b32 s3, 0xf000
558; GFX6-NEXT:    v_mov_b32_e32 v1, s5
559; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
560; GFX6-NEXT:    s_waitcnt vmcnt(0)
561; GFX6-NEXT:    ; return to shader part epilog
562;
563; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4294967297:
564; GFX7:       ; %bb.0:
565; GFX7-NEXT:    s_mov_b32 s4, 4
566; GFX7-NEXT:    s_mov_b32 s5, s4
567; GFX7-NEXT:    v_mov_b32_e32 v0, s4
568; GFX7-NEXT:    s_mov_b32 s0, s2
569; GFX7-NEXT:    s_mov_b32 s1, s3
570; GFX7-NEXT:    s_mov_b32 s2, 0
571; GFX7-NEXT:    s_mov_b32 s3, 0xf000
572; GFX7-NEXT:    v_mov_b32_e32 v1, s5
573; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
574; GFX7-NEXT:    s_waitcnt vmcnt(0)
575; GFX7-NEXT:    ; return to shader part epilog
576  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967297
577  %val = load volatile float, float addrspace(1)* %gep
578  ret float %val
579}
580
581define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4096(float addrspace(1)* inreg %ptr) {
582; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4096:
583; GFX6:       ; %bb.0:
584; GFX6-NEXT:    s_mov_b32 s0, s2
585; GFX6-NEXT:    s_mov_b32 s1, s3
586; GFX6-NEXT:    s_mov_b32 s2, -1
587; GFX6-NEXT:    s_mov_b32 s3, 0xf000
588; GFX6-NEXT:    s_movk_i32 s4, 0x4000
589; GFX6-NEXT:    buffer_load_dword v0, off, s[0:3], s4
590; GFX6-NEXT:    s_waitcnt vmcnt(0)
591; GFX6-NEXT:    ; return to shader part epilog
592;
593; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4096:
594; GFX7:       ; %bb.0:
595; GFX7-NEXT:    s_mov_b32 s0, s2
596; GFX7-NEXT:    s_mov_b32 s1, s3
597; GFX7-NEXT:    s_mov_b32 s2, -1
598; GFX7-NEXT:    s_mov_b32 s3, 0xf000
599; GFX7-NEXT:    s_movk_i32 s4, 0x4000
600; GFX7-NEXT:    buffer_load_dword v0, off, s[0:3], s4
601; GFX7-NEXT:    s_waitcnt vmcnt(0)
602; GFX7-NEXT:    ; return to shader part epilog
603  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4096
604  %val = load volatile float, float addrspace(1)* %gep
605  ret float %val
606}
607
608define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4095(float addrspace(1)* %ptr) {
609; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4095:
610; GFX6:       ; %bb.0:
611; GFX6-NEXT:    s_mov_b32 s2, 0
612; GFX6-NEXT:    s_mov_b32 s3, 0xf000
613; GFX6-NEXT:    s_mov_b64 s[0:1], 0
614; GFX6-NEXT:    s_movk_i32 s4, 0x3ffc
615; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], s4 addr64
616; GFX6-NEXT:    s_waitcnt vmcnt(0)
617; GFX6-NEXT:    ; return to shader part epilog
618;
619; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4095:
620; GFX7:       ; %bb.0:
621; GFX7-NEXT:    s_mov_b32 s2, 0
622; GFX7-NEXT:    s_mov_b32 s3, 0xf000
623; GFX7-NEXT:    s_mov_b64 s[0:1], 0
624; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
625; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], s4 addr64
626; GFX7-NEXT:    s_waitcnt vmcnt(0)
627; GFX7-NEXT:    ; return to shader part epilog
628  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4095
629  %val = load volatile float, float addrspace(1)* %gep
630  ret float %val
631}
632
633define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967296(float addrspace(1)* %ptr) {
634; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4294967296:
635; GFX6:       ; %bb.0:
636; GFX6-NEXT:    s_mov_b32 s0, 0
637; GFX6-NEXT:    s_mov_b32 s1, 4
638; GFX6-NEXT:    s_mov_b32 s3, 0xf000
639; GFX6-NEXT:    s_mov_b32 s2, s0
640; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
641; GFX6-NEXT:    s_waitcnt vmcnt(0)
642; GFX6-NEXT:    ; return to shader part epilog
643;
644; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4294967296:
645; GFX7:       ; %bb.0:
646; GFX7-NEXT:    s_mov_b32 s0, 0
647; GFX7-NEXT:    s_mov_b32 s1, 4
648; GFX7-NEXT:    s_mov_b32 s3, 0xf000
649; GFX7-NEXT:    s_mov_b32 s2, s0
650; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
651; GFX7-NEXT:    s_waitcnt vmcnt(0)
652; GFX7-NEXT:    ; return to shader part epilog
653  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967296
654  %val = load volatile float, float addrspace(1)* %gep
655  ret float %val
656}
657
658define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(float addrspace(1)* %ptr) {
659; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4294967297:
660; GFX6:       ; %bb.0:
661; GFX6-NEXT:    s_mov_b32 s0, 4
662; GFX6-NEXT:    s_mov_b32 s1, s0
663; GFX6-NEXT:    s_mov_b32 s2, 0
664; GFX6-NEXT:    s_mov_b32 s3, 0xf000
665; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
666; GFX6-NEXT:    s_waitcnt vmcnt(0)
667; GFX6-NEXT:    ; return to shader part epilog
668;
669; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4294967297:
670; GFX7:       ; %bb.0:
671; GFX7-NEXT:    s_mov_b32 s0, 4
672; GFX7-NEXT:    s_mov_b32 s1, s0
673; GFX7-NEXT:    s_mov_b32 s2, 0
674; GFX7-NEXT:    s_mov_b32 s3, 0xf000
675; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
676; GFX7-NEXT:    s_waitcnt vmcnt(0)
677; GFX7-NEXT:    ; return to shader part epilog
678  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967297
679  %val = load volatile float, float addrspace(1)* %gep
680  ret float %val
681}
682
683define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4096(float addrspace(1)* %ptr) {
684; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4096:
685; GFX6:       ; %bb.0:
686; GFX6-NEXT:    s_mov_b32 s2, 0
687; GFX6-NEXT:    s_mov_b32 s3, 0xf000
688; GFX6-NEXT:    s_mov_b64 s[0:1], 0
689; GFX6-NEXT:    s_movk_i32 s4, 0x4000
690; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], s4 addr64
691; GFX6-NEXT:    s_waitcnt vmcnt(0)
692; GFX6-NEXT:    ; return to shader part epilog
693;
694; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4096:
695; GFX7:       ; %bb.0:
696; GFX7-NEXT:    s_mov_b32 s2, 0
697; GFX7-NEXT:    s_mov_b32 s3, 0xf000
698; GFX7-NEXT:    s_mov_b64 s[0:1], 0
699; GFX7-NEXT:    s_movk_i32 s4, 0x4000
700; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], s4 addr64
701; GFX7-NEXT:    s_waitcnt vmcnt(0)
702; GFX7-NEXT:    ; return to shader part epilog
703  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4096
704  %val = load volatile float, float addrspace(1)* %gep
705  ret float %val
706}
707
708define amdgpu_ps float @mubuf_load_sgpr_ptr_sgpr_offset(float addrspace(1)* inreg %ptr, i32 inreg %soffset) {
709; GFX6-LABEL: mubuf_load_sgpr_ptr_sgpr_offset:
710; GFX6:       ; %bb.0:
711; GFX6-NEXT:    s_mov_b32 s0, s2
712; GFX6-NEXT:    s_mov_b32 s1, s3
713; GFX6-NEXT:    s_bfe_i64 s[2:3], s[4:5], 0x200000
714; GFX6-NEXT:    s_lshl_b64 s[4:5], s[2:3], 2
715; GFX6-NEXT:    v_mov_b32_e32 v0, s4
716; GFX6-NEXT:    s_mov_b32 s2, 0
717; GFX6-NEXT:    s_mov_b32 s3, 0xf000
718; GFX6-NEXT:    v_mov_b32_e32 v1, s5
719; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
720; GFX6-NEXT:    s_waitcnt vmcnt(0)
721; GFX6-NEXT:    ; return to shader part epilog
722;
723; GFX7-LABEL: mubuf_load_sgpr_ptr_sgpr_offset:
724; GFX7:       ; %bb.0:
725; GFX7-NEXT:    s_mov_b32 s0, s2
726; GFX7-NEXT:    s_mov_b32 s1, s3
727; GFX7-NEXT:    s_bfe_i64 s[2:3], s[4:5], 0x200000
728; GFX7-NEXT:    s_lshl_b64 s[4:5], s[2:3], 2
729; GFX7-NEXT:    v_mov_b32_e32 v0, s4
730; GFX7-NEXT:    s_mov_b32 s2, 0
731; GFX7-NEXT:    s_mov_b32 s3, 0xf000
732; GFX7-NEXT:    v_mov_b32_e32 v1, s5
733; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
734; GFX7-NEXT:    s_waitcnt vmcnt(0)
735; GFX7-NEXT:    ; return to shader part epilog
736  %gep = getelementptr float, float addrspace(1)* %ptr, i32 %soffset
737  %val = load volatile float, float addrspace(1)* %gep
738  ret float %val
739}
740
741define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset(float addrspace(1)* %ptr, i32 inreg %soffset) {
742; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset:
743; GFX6:       ; %bb.0:
744; GFX6-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
745; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
746; GFX6-NEXT:    s_mov_b32 s2, 0
747; GFX6-NEXT:    s_mov_b32 s3, 0xf000
748; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
749; GFX6-NEXT:    s_waitcnt vmcnt(0)
750; GFX6-NEXT:    ; return to shader part epilog
751;
752; GFX7-LABEL: mubuf_load_vgpr_ptr_sgpr_offset:
753; GFX7:       ; %bb.0:
754; GFX7-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
755; GFX7-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
756; GFX7-NEXT:    s_mov_b32 s2, 0
757; GFX7-NEXT:    s_mov_b32 s3, 0xf000
758; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
759; GFX7-NEXT:    s_waitcnt vmcnt(0)
760; GFX7-NEXT:    ; return to shader part epilog
761  %gep = getelementptr float, float addrspace(1)* %ptr, i32 %soffset
762  %val = load volatile float, float addrspace(1)* %gep
763  ret float %val
764}
765
766define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset_offset256(float addrspace(1)* %ptr, i32 inreg %soffset) {
767; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset_offset256:
768; GFX6:       ; %bb.0:
769; GFX6-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
770; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
771; GFX6-NEXT:    s_mov_b32 s2, 0
772; GFX6-NEXT:    s_mov_b32 s3, 0xf000
773; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024
774; GFX6-NEXT:    s_waitcnt vmcnt(0)
775; GFX6-NEXT:    ; return to shader part epilog
776;
777; GFX7-LABEL: mubuf_load_vgpr_ptr_sgpr_offset_offset256:
778; GFX7:       ; %bb.0:
779; GFX7-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
780; GFX7-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
781; GFX7-NEXT:    s_mov_b32 s2, 0
782; GFX7-NEXT:    s_mov_b32 s3, 0xf000
783; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024
784; GFX7-NEXT:    s_waitcnt vmcnt(0)
785; GFX7-NEXT:    ; return to shader part epilog
786  %gep0 = getelementptr float, float addrspace(1)* %ptr, i32 %soffset
787  %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 256
788  %val = load volatile float, float addrspace(1)* %gep1
789  ret float %val
790}
791
792define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset256_offset(float addrspace(1)* %ptr, i32 inreg %soffset) {
793; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset:
794; GFX6:       ; %bb.0:
795; GFX6-NEXT:    s_movk_i32 s4, 0x400
796; GFX6-NEXT:    s_mov_b32 s5, 0
797; GFX6-NEXT:    v_mov_b32_e32 v2, s4
798; GFX6-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
799; GFX6-NEXT:    v_mov_b32_e32 v3, s5
800; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
801; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
802; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
803; GFX6-NEXT:    s_mov_b32 s3, 0xf000
804; GFX6-NEXT:    s_mov_b32 s2, s5
805; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
806; GFX6-NEXT:    s_waitcnt vmcnt(0)
807; GFX6-NEXT:    ; return to shader part epilog
808;
809; GFX7-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset:
810; GFX7:       ; %bb.0:
811; GFX7-NEXT:    s_movk_i32 s4, 0x400
812; GFX7-NEXT:    s_mov_b32 s5, 0
813; GFX7-NEXT:    v_mov_b32_e32 v2, s4
814; GFX7-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
815; GFX7-NEXT:    v_mov_b32_e32 v3, s5
816; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
817; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
818; GFX7-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
819; GFX7-NEXT:    s_mov_b32 s3, 0xf000
820; GFX7-NEXT:    s_mov_b32 s2, s5
821; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
822; GFX7-NEXT:    s_waitcnt vmcnt(0)
823; GFX7-NEXT:    ; return to shader part epilog
824  %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 256
825  %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 %soffset
826  %val = load volatile float, float addrspace(1)* %gep1
827  ret float %val
828}
829
830define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset(float addrspace(1)* inreg %ptr, i32 %voffset) {
831; GFX6-LABEL: mubuf_load_sgpr_ptr_vgpr_offset:
832; GFX6:       ; %bb.0:
833; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
834; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
835; GFX6-NEXT:    s_mov_b32 s0, s2
836; GFX6-NEXT:    s_mov_b32 s1, s3
837; GFX6-NEXT:    s_mov_b32 s2, 0
838; GFX6-NEXT:    s_mov_b32 s3, 0xf000
839; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
840; GFX6-NEXT:    s_waitcnt vmcnt(0)
841; GFX6-NEXT:    ; return to shader part epilog
842;
843; GFX7-LABEL: mubuf_load_sgpr_ptr_vgpr_offset:
844; GFX7:       ; %bb.0:
845; GFX7-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
846; GFX7-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
847; GFX7-NEXT:    s_mov_b32 s0, s2
848; GFX7-NEXT:    s_mov_b32 s1, s3
849; GFX7-NEXT:    s_mov_b32 s2, 0
850; GFX7-NEXT:    s_mov_b32 s3, 0xf000
851; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
852; GFX7-NEXT:    s_waitcnt vmcnt(0)
853; GFX7-NEXT:    ; return to shader part epilog
854  %gep = getelementptr float, float addrspace(1)* %ptr, i32 %voffset
855  %val = load volatile float, float addrspace(1)* %gep
856  ret float %val
857}
858
859define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset_offset4095(float addrspace(1)* inreg %ptr, i32 %voffset) {
860; GFX6-LABEL: mubuf_load_sgpr_ptr_vgpr_offset_offset4095:
861; GFX6:       ; %bb.0:
862; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
863; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
864; GFX6-NEXT:    s_mov_b32 s0, s2
865; GFX6-NEXT:    s_mov_b32 s1, s3
866; GFX6-NEXT:    s_mov_b32 s2, 0
867; GFX6-NEXT:    s_mov_b32 s3, 0xf000
868; GFX6-NEXT:    s_movk_i32 s4, 0x3ffc
869; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], s4 addr64
870; GFX6-NEXT:    s_waitcnt vmcnt(0)
871; GFX6-NEXT:    ; return to shader part epilog
872;
873; GFX7-LABEL: mubuf_load_sgpr_ptr_vgpr_offset_offset4095:
874; GFX7:       ; %bb.0:
875; GFX7-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
876; GFX7-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
877; GFX7-NEXT:    s_mov_b32 s0, s2
878; GFX7-NEXT:    s_mov_b32 s1, s3
879; GFX7-NEXT:    s_mov_b32 s2, 0
880; GFX7-NEXT:    s_mov_b32 s3, 0xf000
881; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
882; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], s4 addr64
883; GFX7-NEXT:    s_waitcnt vmcnt(0)
884; GFX7-NEXT:    ; return to shader part epilog
885  %gep0 = getelementptr float, float addrspace(1)* %ptr, i32 %voffset
886  %gep1 = getelementptr float, float addrspace(1)* %gep0, i64 4095
887  %val = load volatile float, float addrspace(1)* %gep1
888  ret float %val
889}
890define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095_vgpr_offset(float addrspace(1)* inreg %ptr, i32 %voffset) {
891; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4095_vgpr_offset:
892; GFX6:       ; %bb.0:
893; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
894; GFX6-NEXT:    s_add_u32 s4, s2, 0x3ffc
895; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
896; GFX6-NEXT:    s_mov_b32 s6, 0
897; GFX6-NEXT:    s_addc_u32 s5, s3, 0
898; GFX6-NEXT:    s_mov_b32 s7, 0xf000
899; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
900; GFX6-NEXT:    s_waitcnt vmcnt(0)
901; GFX6-NEXT:    ; return to shader part epilog
902;
903; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4095_vgpr_offset:
904; GFX7:       ; %bb.0:
905; GFX7-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
906; GFX7-NEXT:    s_add_u32 s4, s2, 0x3ffc
907; GFX7-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
908; GFX7-NEXT:    s_mov_b32 s6, 0
909; GFX7-NEXT:    s_addc_u32 s5, s3, 0
910; GFX7-NEXT:    s_mov_b32 s7, 0xf000
911; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
912; GFX7-NEXT:    s_waitcnt vmcnt(0)
913; GFX7-NEXT:    ; return to shader part epilog
914  %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 4095
915  %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 %voffset
916  %val = load volatile float, float addrspace(1)* %gep1
917  ret float %val
918}
919
920define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr) {
921; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4095:
922; GFX6:       ; %bb.0:
923; GFX6-NEXT:    s_mov_b32 s0, s2
924; GFX6-NEXT:    s_mov_b32 s1, s3
925; GFX6-NEXT:    v_mov_b32_e32 v0, 2
926; GFX6-NEXT:    s_mov_b32 s2, -1
927; GFX6-NEXT:    s_mov_b32 s3, 0xf000
928; GFX6-NEXT:    s_movk_i32 s4, 0x3ffc
929; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
930; GFX6-NEXT:    buffer_atomic_add v0, off, s[0:3], s4 glc
931; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
932; GFX6-NEXT:    buffer_wbinvl1
933; GFX6-NEXT:    s_waitcnt expcnt(0)
934; GFX6-NEXT:    ; return to shader part epilog
935;
936; GFX7-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4095:
937; GFX7:       ; %bb.0:
938; GFX7-NEXT:    s_mov_b32 s0, s2
939; GFX7-NEXT:    s_mov_b32 s1, s3
940; GFX7-NEXT:    v_mov_b32_e32 v0, 2
941; GFX7-NEXT:    s_mov_b32 s2, -1
942; GFX7-NEXT:    s_mov_b32 s3, 0xf000
943; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
944; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
945; GFX7-NEXT:    buffer_atomic_add v0, off, s[0:3], s4 glc
946; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
947; GFX7-NEXT:    buffer_wbinvl1
948; GFX7-NEXT:    ; return to shader part epilog
949  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
950  %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst
951  %cast = bitcast i32 %result to float
952  ret float %cast
953}
954
955define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr) {
956; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4294967296:
957; GFX6:       ; %bb.0:
958; GFX6-NEXT:    s_mov_b32 s4, 0
959; GFX6-NEXT:    s_mov_b32 s5, 4
960; GFX6-NEXT:    v_mov_b32_e32 v1, s4
961; GFX6-NEXT:    s_mov_b32 s0, s2
962; GFX6-NEXT:    s_mov_b32 s1, s3
963; GFX6-NEXT:    v_mov_b32_e32 v0, 2
964; GFX6-NEXT:    s_mov_b32 s3, 0xf000
965; GFX6-NEXT:    s_mov_b32 s2, s4
966; GFX6-NEXT:    v_mov_b32_e32 v2, s5
967; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
968; GFX6-NEXT:    buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc
969; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
970; GFX6-NEXT:    buffer_wbinvl1
971; GFX6-NEXT:    s_waitcnt expcnt(0)
972; GFX6-NEXT:    ; return to shader part epilog
973;
974; GFX7-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4294967296:
975; GFX7:       ; %bb.0:
976; GFX7-NEXT:    s_mov_b32 s4, 0
977; GFX7-NEXT:    s_mov_b32 s5, 4
978; GFX7-NEXT:    v_mov_b32_e32 v1, s4
979; GFX7-NEXT:    s_mov_b32 s0, s2
980; GFX7-NEXT:    s_mov_b32 s1, s3
981; GFX7-NEXT:    v_mov_b32_e32 v0, 2
982; GFX7-NEXT:    s_mov_b32 s3, 0xf000
983; GFX7-NEXT:    s_mov_b32 s2, s4
984; GFX7-NEXT:    v_mov_b32_e32 v2, s5
985; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
986; GFX7-NEXT:    buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc
987; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
988; GFX7-NEXT:    buffer_wbinvl1
989; GFX7-NEXT:    ; return to shader part epilog
990  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
991  %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst
992  %cast = bitcast i32 %result to float
993  ret float %cast
994}
995
996define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr) {
997; GFX6-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4095:
998; GFX6:       ; %bb.0:
999; GFX6-NEXT:    v_mov_b32_e32 v2, 2
1000; GFX6-NEXT:    s_mov_b32 s2, 0
1001; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1002; GFX6-NEXT:    s_mov_b64 s[0:1], 0
1003; GFX6-NEXT:    s_movk_i32 s4, 0x3ffc
1004; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1005; GFX6-NEXT:    buffer_atomic_add v2, v[0:1], s[0:3], s4 addr64 glc
1006; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1007; GFX6-NEXT:    buffer_wbinvl1
1008; GFX6-NEXT:    v_mov_b32_e32 v0, v2
1009; GFX6-NEXT:    s_waitcnt expcnt(0)
1010; GFX6-NEXT:    ; return to shader part epilog
1011;
1012; GFX7-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4095:
1013; GFX7:       ; %bb.0:
1014; GFX7-NEXT:    v_mov_b32_e32 v2, 2
1015; GFX7-NEXT:    s_mov_b32 s2, 0
1016; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1017; GFX7-NEXT:    s_mov_b64 s[0:1], 0
1018; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
1019; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1020; GFX7-NEXT:    buffer_atomic_add v2, v[0:1], s[0:3], s4 addr64 glc
1021; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1022; GFX7-NEXT:    buffer_wbinvl1
1023; GFX7-NEXT:    v_mov_b32_e32 v0, v2
1024; GFX7-NEXT:    ; return to shader part epilog
1025  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
1026  %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst
1027  %cast = bitcast i32 %result to float
1028  ret float %cast
1029}
1030
1031define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr) {
1032; GFX6-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4294967296:
1033; GFX6:       ; %bb.0:
1034; GFX6-NEXT:    s_mov_b32 s0, 0
1035; GFX6-NEXT:    s_mov_b32 s1, 4
1036; GFX6-NEXT:    v_mov_b32_e32 v2, 2
1037; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1038; GFX6-NEXT:    s_mov_b32 s2, s0
1039; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1040; GFX6-NEXT:    buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
1041; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1042; GFX6-NEXT:    buffer_wbinvl1
1043; GFX6-NEXT:    v_mov_b32_e32 v0, v2
1044; GFX6-NEXT:    s_waitcnt expcnt(0)
1045; GFX6-NEXT:    ; return to shader part epilog
1046;
1047; GFX7-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4294967296:
1048; GFX7:       ; %bb.0:
1049; GFX7-NEXT:    s_mov_b32 s0, 0
1050; GFX7-NEXT:    s_mov_b32 s1, 4
1051; GFX7-NEXT:    v_mov_b32_e32 v2, 2
1052; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1053; GFX7-NEXT:    s_mov_b32 s2, s0
1054; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1055; GFX7-NEXT:    buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
1056; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1057; GFX7-NEXT:    buffer_wbinvl1
1058; GFX7-NEXT:    v_mov_b32_e32 v0, v2
1059; GFX7-NEXT:    ; return to shader part epilog
1060  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
1061  %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst
1062  %cast = bitcast i32 %result to float
1063  ret float %cast
1064}
1065
1066define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) {
1067; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_vgpr_offset:
1068; GFX6:       ; %bb.0:
1069; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
1070; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
1071; GFX6-NEXT:    s_mov_b32 s0, s2
1072; GFX6-NEXT:    s_mov_b32 s1, s3
1073; GFX6-NEXT:    v_mov_b32_e32 v2, 2
1074; GFX6-NEXT:    s_mov_b32 s2, 0
1075; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1076; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1077; GFX6-NEXT:    buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
1078; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1079; GFX6-NEXT:    buffer_wbinvl1
1080; GFX6-NEXT:    v_mov_b32_e32 v0, v2
1081; GFX6-NEXT:    s_waitcnt expcnt(0)
1082; GFX6-NEXT:    ; return to shader part epilog
1083;
1084; GFX7-LABEL: mubuf_atomicrmw_sgpr_ptr_vgpr_offset:
1085; GFX7:       ; %bb.0:
1086; GFX7-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
1087; GFX7-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
1088; GFX7-NEXT:    s_mov_b32 s0, s2
1089; GFX7-NEXT:    s_mov_b32 s1, s3
1090; GFX7-NEXT:    v_mov_b32_e32 v2, 2
1091; GFX7-NEXT:    s_mov_b32 s2, 0
1092; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1093; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1094; GFX7-NEXT:    buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
1095; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1096; GFX7-NEXT:    buffer_wbinvl1
1097; GFX7-NEXT:    v_mov_b32_e32 v0, v2
1098; GFX7-NEXT:    ; return to shader part epilog
1099  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset
1100  %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst
1101  %cast = bitcast i32 %result to float
1102  ret float %cast
1103}
1104
1105define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr, i32 %old, i32 %in) {
1106; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4095:
1107; GFX6:       ; %bb.0:
1108; GFX6-NEXT:    s_mov_b32 s0, s2
1109; GFX6-NEXT:    s_mov_b32 s1, s3
1110; GFX6-NEXT:    v_mov_b32_e32 v2, v0
1111; GFX6-NEXT:    s_mov_b32 s2, -1
1112; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1113; GFX6-NEXT:    s_movk_i32 s4, 0x3ffc
1114; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1115; GFX6-NEXT:    buffer_atomic_cmpswap v[1:2], off, s[0:3], s4 glc
1116; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1117; GFX6-NEXT:    buffer_wbinvl1
1118; GFX6-NEXT:    v_mov_b32_e32 v0, v1
1119; GFX6-NEXT:    s_waitcnt expcnt(0)
1120; GFX6-NEXT:    ; return to shader part epilog
1121;
1122; GFX7-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4095:
1123; GFX7:       ; %bb.0:
1124; GFX7-NEXT:    s_mov_b32 s0, s2
1125; GFX7-NEXT:    s_mov_b32 s1, s3
1126; GFX7-NEXT:    v_mov_b32_e32 v2, v0
1127; GFX7-NEXT:    s_mov_b32 s2, -1
1128; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1129; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
1130; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1131; GFX7-NEXT:    buffer_atomic_cmpswap v[1:2], off, s[0:3], s4 glc
1132; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1133; GFX7-NEXT:    buffer_wbinvl1
1134; GFX7-NEXT:    v_mov_b32_e32 v0, v1
1135; GFX7-NEXT:    ; return to shader part epilog
1136  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
1137  %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
1138  %result = extractvalue { i32, i1 } %result.struct, 0
1139  %cast = bitcast i32 %result to float
1140  ret float %cast
1141}
1142
1143define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr, i32 %old, i32 %in) {
1144; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4294967296:
1145; GFX6:       ; %bb.0:
1146; GFX6-NEXT:    s_mov_b32 s4, 0
1147; GFX6-NEXT:    s_mov_b32 s5, 4
1148; GFX6-NEXT:    v_mov_b32_e32 v3, s4
1149; GFX6-NEXT:    s_mov_b32 s0, s2
1150; GFX6-NEXT:    s_mov_b32 s1, s3
1151; GFX6-NEXT:    v_mov_b32_e32 v2, v0
1152; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1153; GFX6-NEXT:    s_mov_b32 s2, s4
1154; GFX6-NEXT:    v_mov_b32_e32 v4, s5
1155; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1156; GFX6-NEXT:    buffer_atomic_cmpswap v[1:2], v[3:4], s[0:3], 0 addr64 glc
1157; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1158; GFX6-NEXT:    buffer_wbinvl1
1159; GFX6-NEXT:    v_mov_b32_e32 v0, v1
1160; GFX6-NEXT:    s_waitcnt expcnt(0)
1161; GFX6-NEXT:    ; return to shader part epilog
1162;
1163; GFX7-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4294967296:
1164; GFX7:       ; %bb.0:
1165; GFX7-NEXT:    s_mov_b32 s4, 0
1166; GFX7-NEXT:    s_mov_b32 s5, 4
1167; GFX7-NEXT:    v_mov_b32_e32 v3, s4
1168; GFX7-NEXT:    s_mov_b32 s0, s2
1169; GFX7-NEXT:    s_mov_b32 s1, s3
1170; GFX7-NEXT:    v_mov_b32_e32 v2, v0
1171; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1172; GFX7-NEXT:    s_mov_b32 s2, s4
1173; GFX7-NEXT:    v_mov_b32_e32 v4, s5
1174; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1175; GFX7-NEXT:    buffer_atomic_cmpswap v[1:2], v[3:4], s[0:3], 0 addr64 glc
1176; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1177; GFX7-NEXT:    buffer_wbinvl1
1178; GFX7-NEXT:    v_mov_b32_e32 v0, v1
1179; GFX7-NEXT:    ; return to shader part epilog
1180  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
1181  %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
1182  %result = extractvalue { i32, i1 } %result.struct, 0
1183  %cast = bitcast i32 %result to float
1184  ret float %cast
1185}
1186
1187define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr, i32 %old, i32 %in) {
1188; GFX6-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4095:
1189; GFX6:       ; %bb.0:
1190; GFX6-NEXT:    v_mov_b32_e32 v4, v2
1191; GFX6-NEXT:    s_mov_b32 s2, 0
1192; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1193; GFX6-NEXT:    s_mov_b64 s[0:1], 0
1194; GFX6-NEXT:    s_movk_i32 s4, 0x3ffc
1195; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1196; GFX6-NEXT:    buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], s4 addr64 glc
1197; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1198; GFX6-NEXT:    buffer_wbinvl1
1199; GFX6-NEXT:    v_mov_b32_e32 v0, v3
1200; GFX6-NEXT:    s_waitcnt expcnt(0)
1201; GFX6-NEXT:    ; return to shader part epilog
1202;
1203; GFX7-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4095:
1204; GFX7:       ; %bb.0:
1205; GFX7-NEXT:    v_mov_b32_e32 v4, v2
1206; GFX7-NEXT:    s_mov_b32 s2, 0
1207; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1208; GFX7-NEXT:    s_mov_b64 s[0:1], 0
1209; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
1210; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1211; GFX7-NEXT:    buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], s4 addr64 glc
1212; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1213; GFX7-NEXT:    buffer_wbinvl1
1214; GFX7-NEXT:    v_mov_b32_e32 v0, v3
1215; GFX7-NEXT:    ; return to shader part epilog
1216  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
1217  %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
1218  %result = extractvalue { i32, i1 } %result.struct, 0
1219  %cast = bitcast i32 %result to float
1220  ret float %cast
1221}
1222
1223define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr, i32 %old, i32 %in) {
1224; GFX6-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4294967296:
1225; GFX6:       ; %bb.0:
1226; GFX6-NEXT:    s_mov_b32 s0, 0
1227; GFX6-NEXT:    v_mov_b32_e32 v4, v2
1228; GFX6-NEXT:    s_mov_b32 s1, 4
1229; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1230; GFX6-NEXT:    s_mov_b32 s2, s0
1231; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1232; GFX6-NEXT:    buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], 0 addr64 glc
1233; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1234; GFX6-NEXT:    buffer_wbinvl1
1235; GFX6-NEXT:    v_mov_b32_e32 v0, v3
1236; GFX6-NEXT:    s_waitcnt expcnt(0)
1237; GFX6-NEXT:    ; return to shader part epilog
1238;
1239; GFX7-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4294967296:
1240; GFX7:       ; %bb.0:
1241; GFX7-NEXT:    s_mov_b32 s0, 0
1242; GFX7-NEXT:    v_mov_b32_e32 v4, v2
1243; GFX7-NEXT:    s_mov_b32 s1, 4
1244; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1245; GFX7-NEXT:    s_mov_b32 s2, s0
1246; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1247; GFX7-NEXT:    buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], 0 addr64 glc
1248; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1249; GFX7-NEXT:    buffer_wbinvl1
1250; GFX7-NEXT:    v_mov_b32_e32 v0, v3
1251; GFX7-NEXT:    ; return to shader part epilog
1252  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
1253  %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
1254  %result = extractvalue { i32, i1 } %result.struct, 0
1255  %cast = bitcast i32 %result to float
1256  ret float %cast
1257}
1258
1259define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset, i32 %old, i32 %in) {
1260; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_vgpr_offset:
1261; GFX6:       ; %bb.0:
1262; GFX6-NEXT:    v_mov_b32_e32 v3, v1
1263; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
1264; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
1265; GFX6-NEXT:    s_mov_b32 s0, s2
1266; GFX6-NEXT:    s_mov_b32 s1, s3
1267; GFX6-NEXT:    s_mov_b32 s2, 0
1268; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1269; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1270; GFX6-NEXT:    buffer_atomic_cmpswap v[2:3], v[0:1], s[0:3], 0 addr64 glc
1271; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1272; GFX6-NEXT:    buffer_wbinvl1
1273; GFX6-NEXT:    v_mov_b32_e32 v0, v2
1274; GFX6-NEXT:    s_waitcnt expcnt(0)
1275; GFX6-NEXT:    ; return to shader part epilog
1276;
1277; GFX7-LABEL: mubuf_cmpxchg_sgpr_ptr_vgpr_offset:
1278; GFX7:       ; %bb.0:
1279; GFX7-NEXT:    v_mov_b32_e32 v3, v1
1280; GFX7-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
1281; GFX7-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
1282; GFX7-NEXT:    s_mov_b32 s0, s2
1283; GFX7-NEXT:    s_mov_b32 s1, s3
1284; GFX7-NEXT:    s_mov_b32 s2, 0
1285; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1286; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1287; GFX7-NEXT:    buffer_atomic_cmpswap v[2:3], v[0:1], s[0:3], 0 addr64 glc
1288; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1289; GFX7-NEXT:    buffer_wbinvl1
1290; GFX7-NEXT:    v_mov_b32_e32 v0, v2
1291; GFX7-NEXT:    ; return to shader part epilog
1292  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset
1293  %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
1294  %result = extractvalue { i32, i1 } %result.struct, 0
1295  %cast = bitcast i32 %result to float
1296  ret float %cast
1297}
1298