1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
4
5; Test end to end matching of addressing modes when MUBUF is used for
6; global memory.
7
8define amdgpu_ps void @mubuf_store_sgpr_ptr(i32 addrspace(1)* inreg %ptr) {
9; GFX6-LABEL: mubuf_store_sgpr_ptr:
10; GFX6:       ; %bb.0:
11; GFX6-NEXT:    s_mov_b32 s0, s2
12; GFX6-NEXT:    s_mov_b32 s1, s3
13; GFX6-NEXT:    v_mov_b32_e32 v0, 0
14; GFX6-NEXT:    s_mov_b32 s2, -1
15; GFX6-NEXT:    s_mov_b32 s3, 0xf000
16; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
17; GFX6-NEXT:    s_endpgm
18;
19; GFX7-LABEL: mubuf_store_sgpr_ptr:
20; GFX7:       ; %bb.0:
21; GFX7-NEXT:    s_mov_b32 s0, s2
22; GFX7-NEXT:    s_mov_b32 s1, s3
23; GFX7-NEXT:    v_mov_b32_e32 v0, 0
24; GFX7-NEXT:    s_mov_b32 s2, -1
25; GFX7-NEXT:    s_mov_b32 s3, 0xf000
26; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
27; GFX7-NEXT:    s_endpgm
28  store i32 0, i32 addrspace(1)* %ptr
29  ret void
30}
31
32define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr) {
33; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095:
34; GFX6:       ; %bb.0:
35; GFX6-NEXT:    s_mov_b32 s0, s2
36; GFX6-NEXT:    s_mov_b32 s1, s3
37; GFX6-NEXT:    v_mov_b32_e32 v0, 0
38; GFX6-NEXT:    s_mov_b32 s2, -1
39; GFX6-NEXT:    s_mov_b32 s3, 0xf000
40; GFX6-NEXT:    s_movk_i32 s4, 0x3ffc
41; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], s4
42; GFX6-NEXT:    s_endpgm
43;
44; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4095:
45; GFX7:       ; %bb.0:
46; GFX7-NEXT:    s_mov_b32 s0, s2
47; GFX7-NEXT:    s_mov_b32 s1, s3
48; GFX7-NEXT:    v_mov_b32_e32 v0, 0
49; GFX7-NEXT:    s_mov_b32 s2, -1
50; GFX7-NEXT:    s_mov_b32 s3, 0xf000
51; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
52; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], s4
53; GFX7-NEXT:    s_endpgm
54  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
55  store i32 0, i32 addrspace(1)* %gep
56  ret void
57}
58
59define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr) {
60; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967296:
61; GFX6:       ; %bb.0:
62; GFX6-NEXT:    s_mov_b32 s4, 0
63; GFX6-NEXT:    s_mov_b32 s5, 4
64; GFX6-NEXT:    v_mov_b32_e32 v0, s4
65; GFX6-NEXT:    s_mov_b32 s0, s2
66; GFX6-NEXT:    s_mov_b32 s1, s3
67; GFX6-NEXT:    v_mov_b32_e32 v2, 0
68; GFX6-NEXT:    s_mov_b32 s3, 0xf000
69; GFX6-NEXT:    s_mov_b32 s2, s4
70; GFX6-NEXT:    v_mov_b32_e32 v1, s5
71; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
72; GFX6-NEXT:    s_endpgm
73;
74; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4294967296:
75; GFX7:       ; %bb.0:
76; GFX7-NEXT:    s_mov_b32 s4, 0
77; GFX7-NEXT:    s_mov_b32 s5, 4
78; GFX7-NEXT:    v_mov_b32_e32 v0, s4
79; GFX7-NEXT:    s_mov_b32 s0, s2
80; GFX7-NEXT:    s_mov_b32 s1, s3
81; GFX7-NEXT:    v_mov_b32_e32 v2, 0
82; GFX7-NEXT:    s_mov_b32 s3, 0xf000
83; GFX7-NEXT:    s_mov_b32 s2, s4
84; GFX7-NEXT:    v_mov_b32_e32 v1, s5
85; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
86; GFX7-NEXT:    s_endpgm
87  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
88  store i32 0, i32 addrspace(1)* %gep
89  ret void
90}
91
92define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967297(i32 addrspace(1)* inreg %ptr) {
93; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967297:
94; GFX6:       ; %bb.0:
95; GFX6-NEXT:    s_mov_b32 s4, 4
96; GFX6-NEXT:    s_mov_b32 s5, s4
97; GFX6-NEXT:    v_mov_b32_e32 v0, s4
98; GFX6-NEXT:    s_mov_b32 s0, s2
99; GFX6-NEXT:    s_mov_b32 s1, s3
100; GFX6-NEXT:    s_mov_b32 s2, 0
101; GFX6-NEXT:    v_mov_b32_e32 v2, 0
102; GFX6-NEXT:    s_mov_b32 s3, 0xf000
103; GFX6-NEXT:    v_mov_b32_e32 v1, s5
104; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
105; GFX6-NEXT:    s_endpgm
106;
107; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4294967297:
108; GFX7:       ; %bb.0:
109; GFX7-NEXT:    s_mov_b32 s4, 4
110; GFX7-NEXT:    s_mov_b32 s5, s4
111; GFX7-NEXT:    v_mov_b32_e32 v0, s4
112; GFX7-NEXT:    s_mov_b32 s0, s2
113; GFX7-NEXT:    s_mov_b32 s1, s3
114; GFX7-NEXT:    s_mov_b32 s2, 0
115; GFX7-NEXT:    v_mov_b32_e32 v2, 0
116; GFX7-NEXT:    s_mov_b32 s3, 0xf000
117; GFX7-NEXT:    v_mov_b32_e32 v1, s5
118; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
119; GFX7-NEXT:    s_endpgm
120  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967297
121  store i32 0, i32 addrspace(1)* %gep
122  ret void
123}
124
125define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4096(i32 addrspace(1)* inreg %ptr) {
126; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4096:
127; GFX6:       ; %bb.0:
128; GFX6-NEXT:    s_mov_b32 s0, s2
129; GFX6-NEXT:    s_mov_b32 s1, s3
130; GFX6-NEXT:    v_mov_b32_e32 v0, 0
131; GFX6-NEXT:    s_mov_b32 s2, -1
132; GFX6-NEXT:    s_mov_b32 s3, 0xf000
133; GFX6-NEXT:    s_movk_i32 s4, 0x4000
134; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], s4
135; GFX6-NEXT:    s_endpgm
136;
137; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4096:
138; GFX7:       ; %bb.0:
139; GFX7-NEXT:    s_mov_b32 s0, s2
140; GFX7-NEXT:    s_mov_b32 s1, s3
141; GFX7-NEXT:    v_mov_b32_e32 v0, 0
142; GFX7-NEXT:    s_mov_b32 s2, -1
143; GFX7-NEXT:    s_mov_b32 s3, 0xf000
144; GFX7-NEXT:    s_movk_i32 s4, 0x4000
145; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], s4
146; GFX7-NEXT:    s_endpgm
147  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4096
148  store i32 0, i32 addrspace(1)* %gep
149  ret void
150}
151
152define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr) {
153; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4095:
154; GFX6:       ; %bb.0:
155; GFX6-NEXT:    s_mov_b32 s2, 0
156; GFX6-NEXT:    v_mov_b32_e32 v2, 0
157; GFX6-NEXT:    s_mov_b32 s3, 0xf000
158; GFX6-NEXT:    s_mov_b64 s[0:1], 0
159; GFX6-NEXT:    s_movk_i32 s4, 0x3ffc
160; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
161; GFX6-NEXT:    s_endpgm
162;
163; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4095:
164; GFX7:       ; %bb.0:
165; GFX7-NEXT:    s_mov_b32 s2, 0
166; GFX7-NEXT:    v_mov_b32_e32 v2, 0
167; GFX7-NEXT:    s_mov_b32 s3, 0xf000
168; GFX7-NEXT:    s_mov_b64 s[0:1], 0
169; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
170; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
171; GFX7-NEXT:    s_endpgm
172  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
173  store i32 0, i32 addrspace(1)* %gep
174  ret void
175}
176
177define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr) {
178; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967296:
179; GFX6:       ; %bb.0:
180; GFX6-NEXT:    s_mov_b32 s0, 0
181; GFX6-NEXT:    s_mov_b32 s1, 4
182; GFX6-NEXT:    v_mov_b32_e32 v2, 0
183; GFX6-NEXT:    s_mov_b32 s3, 0xf000
184; GFX6-NEXT:    s_mov_b32 s2, s0
185; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
186; GFX6-NEXT:    s_endpgm
187;
188; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4294967296:
189; GFX7:       ; %bb.0:
190; GFX7-NEXT:    s_mov_b32 s0, 0
191; GFX7-NEXT:    s_mov_b32 s1, 4
192; GFX7-NEXT:    v_mov_b32_e32 v2, 0
193; GFX7-NEXT:    s_mov_b32 s3, 0xf000
194; GFX7-NEXT:    s_mov_b32 s2, s0
195; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
196; GFX7-NEXT:    s_endpgm
197  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
198  store i32 0, i32 addrspace(1)* %gep
199  ret void
200}
201
202define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(i32 addrspace(1)* %ptr) {
203; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967297:
204; GFX6:       ; %bb.0:
205; GFX6-NEXT:    s_mov_b32 s0, 4
206; GFX6-NEXT:    s_mov_b32 s1, s0
207; GFX6-NEXT:    s_mov_b32 s2, 0
208; GFX6-NEXT:    v_mov_b32_e32 v2, 0
209; GFX6-NEXT:    s_mov_b32 s3, 0xf000
210; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
211; GFX6-NEXT:    s_endpgm
212;
213; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4294967297:
214; GFX7:       ; %bb.0:
215; GFX7-NEXT:    s_mov_b32 s0, 4
216; GFX7-NEXT:    s_mov_b32 s1, s0
217; GFX7-NEXT:    s_mov_b32 s2, 0
218; GFX7-NEXT:    v_mov_b32_e32 v2, 0
219; GFX7-NEXT:    s_mov_b32 s3, 0xf000
220; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
221; GFX7-NEXT:    s_endpgm
222  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967297
223  store i32 0, i32 addrspace(1)* %gep
224  ret void
225}
226
227define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4096(i32 addrspace(1)* %ptr) {
228; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4096:
229; GFX6:       ; %bb.0:
230; GFX6-NEXT:    s_mov_b32 s2, 0
231; GFX6-NEXT:    v_mov_b32_e32 v2, 0
232; GFX6-NEXT:    s_mov_b32 s3, 0xf000
233; GFX6-NEXT:    s_mov_b64 s[0:1], 0
234; GFX6-NEXT:    s_movk_i32 s4, 0x4000
235; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
236; GFX6-NEXT:    s_endpgm
237;
238; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4096:
239; GFX7:       ; %bb.0:
240; GFX7-NEXT:    s_mov_b32 s2, 0
241; GFX7-NEXT:    v_mov_b32_e32 v2, 0
242; GFX7-NEXT:    s_mov_b32 s3, 0xf000
243; GFX7-NEXT:    s_mov_b64 s[0:1], 0
244; GFX7-NEXT:    s_movk_i32 s4, 0x4000
245; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
246; GFX7-NEXT:    s_endpgm
247  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4096
248  store i32 0, i32 addrspace(1)* %gep
249  ret void
250}
251
252define amdgpu_ps void @mubuf_store_sgpr_ptr_sgpr_offset(i32 addrspace(1)* inreg %ptr, i32 inreg %soffset) {
253; GFX6-LABEL: mubuf_store_sgpr_ptr_sgpr_offset:
254; GFX6:       ; %bb.0:
255; GFX6-NEXT:    s_mov_b32 s0, s2
256; GFX6-NEXT:    s_mov_b32 s1, s3
257; GFX6-NEXT:    s_bfe_i64 s[2:3], s[4:5], 0x200000
258; GFX6-NEXT:    s_lshl_b64 s[4:5], s[2:3], 2
259; GFX6-NEXT:    v_mov_b32_e32 v0, s4
260; GFX6-NEXT:    s_mov_b32 s2, 0
261; GFX6-NEXT:    v_mov_b32_e32 v2, 0
262; GFX6-NEXT:    s_mov_b32 s3, 0xf000
263; GFX6-NEXT:    v_mov_b32_e32 v1, s5
264; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
265; GFX6-NEXT:    s_endpgm
266;
267; GFX7-LABEL: mubuf_store_sgpr_ptr_sgpr_offset:
268; GFX7:       ; %bb.0:
269; GFX7-NEXT:    s_mov_b32 s0, s2
270; GFX7-NEXT:    s_mov_b32 s1, s3
271; GFX7-NEXT:    s_bfe_i64 s[2:3], s[4:5], 0x200000
272; GFX7-NEXT:    s_lshl_b64 s[4:5], s[2:3], 2
273; GFX7-NEXT:    v_mov_b32_e32 v0, s4
274; GFX7-NEXT:    s_mov_b32 s2, 0
275; GFX7-NEXT:    v_mov_b32_e32 v2, 0
276; GFX7-NEXT:    s_mov_b32 s3, 0xf000
277; GFX7-NEXT:    v_mov_b32_e32 v1, s5
278; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
279; GFX7-NEXT:    s_endpgm
280  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset
281  store i32 0, i32 addrspace(1)* %gep
282  ret void
283}
284
285define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset(i32 addrspace(1)* %ptr, i32 inreg %soffset) {
286; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset:
287; GFX6:       ; %bb.0:
288; GFX6-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
289; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
290; GFX6-NEXT:    s_mov_b32 s2, 0
291; GFX6-NEXT:    v_mov_b32_e32 v2, 0
292; GFX6-NEXT:    s_mov_b32 s3, 0xf000
293; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
294; GFX6-NEXT:    s_endpgm
295;
296; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset:
297; GFX7:       ; %bb.0:
298; GFX7-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
299; GFX7-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
300; GFX7-NEXT:    s_mov_b32 s2, 0
301; GFX7-NEXT:    v_mov_b32_e32 v2, 0
302; GFX7-NEXT:    s_mov_b32 s3, 0xf000
303; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
304; GFX7-NEXT:    s_endpgm
305  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset
306  store i32 0, i32 addrspace(1)* %gep
307  ret void
308}
309
310define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset_offset256(i32 addrspace(1)* %ptr, i32 inreg %soffset) {
311; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset_offset256:
312; GFX6:       ; %bb.0:
313; GFX6-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
314; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
315; GFX6-NEXT:    s_mov_b32 s2, 0
316; GFX6-NEXT:    v_mov_b32_e32 v2, 0
317; GFX6-NEXT:    s_mov_b32 s3, 0xf000
318; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024
319; GFX6-NEXT:    s_endpgm
320;
321; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset_offset256:
322; GFX7:       ; %bb.0:
323; GFX7-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
324; GFX7-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
325; GFX7-NEXT:    s_mov_b32 s2, 0
326; GFX7-NEXT:    v_mov_b32_e32 v2, 0
327; GFX7-NEXT:    s_mov_b32 s3, 0xf000
328; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024
329; GFX7-NEXT:    s_endpgm
330  %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset
331  %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 256
332  store i32 0, i32 addrspace(1)* %gep1
333  ret void
334}
335
336define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset256_offset(i32 addrspace(1)* %ptr, i32 inreg %soffset) {
337; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset:
338; GFX6:       ; %bb.0:
339; GFX6-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
340; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
341; GFX6-NEXT:    s_mov_b32 s2, 0
342; GFX6-NEXT:    v_mov_b32_e32 v2, 0
343; GFX6-NEXT:    s_mov_b32 s3, 0xf000
344; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024
345; GFX6-NEXT:    s_endpgm
346;
347; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset:
348; GFX7:       ; %bb.0:
349; GFX7-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
350; GFX7-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
351; GFX7-NEXT:    s_mov_b32 s2, 0
352; GFX7-NEXT:    v_mov_b32_e32 v2, 0
353; GFX7-NEXT:    s_mov_b32 s3, 0xf000
354; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024
355; GFX7-NEXT:    s_endpgm
356  %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 256
357  %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 %soffset
358  store i32 0, i32 addrspace(1)* %gep1
359  ret void
360}
361
362define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) {
363; GFX6-LABEL: mubuf_store_sgpr_ptr_vgpr_offset:
364; GFX6:       ; %bb.0:
365; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
366; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
367; GFX6-NEXT:    s_mov_b32 s0, s2
368; GFX6-NEXT:    s_mov_b32 s1, s3
369; GFX6-NEXT:    s_mov_b32 s2, 0
370; GFX6-NEXT:    v_mov_b32_e32 v2, 0
371; GFX6-NEXT:    s_mov_b32 s3, 0xf000
372; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
373; GFX6-NEXT:    s_endpgm
374;
375; GFX7-LABEL: mubuf_store_sgpr_ptr_vgpr_offset:
376; GFX7:       ; %bb.0:
377; GFX7-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
378; GFX7-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
379; GFX7-NEXT:    s_mov_b32 s0, s2
380; GFX7-NEXT:    s_mov_b32 s1, s3
381; GFX7-NEXT:    s_mov_b32 s2, 0
382; GFX7-NEXT:    v_mov_b32_e32 v2, 0
383; GFX7-NEXT:    s_mov_b32 s3, 0xf000
384; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
385; GFX7-NEXT:    s_endpgm
386  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset
387  store i32 0, i32 addrspace(1)* %gep
388  ret void
389}
390
391define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset_offset4095(i32 addrspace(1)* inreg %ptr, i32 %voffset) {
392; GFX6-LABEL: mubuf_store_sgpr_ptr_vgpr_offset_offset4095:
393; GFX6:       ; %bb.0:
394; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
395; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
396; GFX6-NEXT:    s_mov_b32 s0, s2
397; GFX6-NEXT:    s_mov_b32 s1, s3
398; GFX6-NEXT:    s_mov_b32 s2, 0
399; GFX6-NEXT:    v_mov_b32_e32 v2, 0
400; GFX6-NEXT:    s_mov_b32 s3, 0xf000
401; GFX6-NEXT:    s_movk_i32 s4, 0x3ffc
402; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
403; GFX6-NEXT:    s_endpgm
404;
405; GFX7-LABEL: mubuf_store_sgpr_ptr_vgpr_offset_offset4095:
406; GFX7:       ; %bb.0:
407; GFX7-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
408; GFX7-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
409; GFX7-NEXT:    s_mov_b32 s0, s2
410; GFX7-NEXT:    s_mov_b32 s1, s3
411; GFX7-NEXT:    s_mov_b32 s2, 0
412; GFX7-NEXT:    v_mov_b32_e32 v2, 0
413; GFX7-NEXT:    s_mov_b32 s3, 0xf000
414; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
415; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
416; GFX7-NEXT:    s_endpgm
417  %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset
418  %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 4095
419  store i32 0, i32 addrspace(1)* %gep1
420  ret void
421}
422define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) {
423; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset:
424; GFX6:       ; %bb.0:
425; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
426; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
427; GFX6-NEXT:    s_mov_b32 s0, s2
428; GFX6-NEXT:    s_mov_b32 s1, s3
429; GFX6-NEXT:    s_mov_b32 s2, 0
430; GFX6-NEXT:    v_mov_b32_e32 v2, 0
431; GFX6-NEXT:    s_mov_b32 s3, 0xf000
432; GFX6-NEXT:    s_movk_i32 s4, 0x3ffc
433; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
434; GFX6-NEXT:    s_endpgm
435;
436; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset:
437; GFX7:       ; %bb.0:
438; GFX7-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
439; GFX7-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
440; GFX7-NEXT:    s_mov_b32 s0, s2
441; GFX7-NEXT:    s_mov_b32 s1, s3
442; GFX7-NEXT:    s_mov_b32 s2, 0
443; GFX7-NEXT:    v_mov_b32_e32 v2, 0
444; GFX7-NEXT:    s_mov_b32 s3, 0xf000
445; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
446; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
447; GFX7-NEXT:    s_endpgm
448  %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 4095
449  %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 %voffset
450  store i32 0, i32 addrspace(1)* %gep1
451  ret void
452}
453
454define amdgpu_ps float @mubuf_load_sgpr_ptr(float addrspace(1)* inreg %ptr) {
455; GFX6-LABEL: mubuf_load_sgpr_ptr:
456; GFX6:       ; %bb.0:
457; GFX6-NEXT:    s_mov_b32 s0, s2
458; GFX6-NEXT:    s_mov_b32 s1, s3
459; GFX6-NEXT:    s_mov_b32 s2, -1
460; GFX6-NEXT:    s_mov_b32 s3, 0xf000
461; GFX6-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
462; GFX6-NEXT:    s_waitcnt vmcnt(0)
463; GFX6-NEXT:    ; return to shader part epilog
464;
465; GFX7-LABEL: mubuf_load_sgpr_ptr:
466; GFX7:       ; %bb.0:
467; GFX7-NEXT:    s_mov_b32 s0, s2
468; GFX7-NEXT:    s_mov_b32 s1, s3
469; GFX7-NEXT:    s_mov_b32 s2, -1
470; GFX7-NEXT:    s_mov_b32 s3, 0xf000
471; GFX7-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
472; GFX7-NEXT:    s_waitcnt vmcnt(0)
473; GFX7-NEXT:    ; return to shader part epilog
474  %val = load volatile float, float addrspace(1)* %ptr
475  ret float %val
476}
477
478define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095(float addrspace(1)* inreg %ptr) {
479; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4095:
480; GFX6:       ; %bb.0:
481; GFX6-NEXT:    s_mov_b32 s0, s2
482; GFX6-NEXT:    s_mov_b32 s1, s3
483; GFX6-NEXT:    s_mov_b32 s2, -1
484; GFX6-NEXT:    s_mov_b32 s3, 0xf000
485; GFX6-NEXT:    s_movk_i32 s4, 0x3ffc
486; GFX6-NEXT:    buffer_load_dword v0, off, s[0:3], s4 glc
487; GFX6-NEXT:    s_waitcnt vmcnt(0)
488; GFX6-NEXT:    ; return to shader part epilog
489;
490; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4095:
491; GFX7:       ; %bb.0:
492; GFX7-NEXT:    s_mov_b32 s0, s2
493; GFX7-NEXT:    s_mov_b32 s1, s3
494; GFX7-NEXT:    s_mov_b32 s2, -1
495; GFX7-NEXT:    s_mov_b32 s3, 0xf000
496; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
497; GFX7-NEXT:    buffer_load_dword v0, off, s[0:3], s4 glc
498; GFX7-NEXT:    s_waitcnt vmcnt(0)
499; GFX7-NEXT:    ; return to shader part epilog
500  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4095
501  %val = load volatile float, float addrspace(1)* %gep
502  ret float %val
503}
504
505define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967296(float addrspace(1)* inreg %ptr) {
506; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967296:
507; GFX6:       ; %bb.0:
508; GFX6-NEXT:    s_mov_b32 s4, 0
509; GFX6-NEXT:    s_mov_b32 s5, 4
510; GFX6-NEXT:    v_mov_b32_e32 v0, s4
511; GFX6-NEXT:    s_mov_b32 s0, s2
512; GFX6-NEXT:    s_mov_b32 s1, s3
513; GFX6-NEXT:    s_mov_b32 s3, 0xf000
514; GFX6-NEXT:    s_mov_b32 s2, s4
515; GFX6-NEXT:    v_mov_b32_e32 v1, s5
516; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
517; GFX6-NEXT:    s_waitcnt vmcnt(0)
518; GFX6-NEXT:    ; return to shader part epilog
519;
520; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4294967296:
521; GFX7:       ; %bb.0:
522; GFX7-NEXT:    s_mov_b32 s4, 0
523; GFX7-NEXT:    s_mov_b32 s5, 4
524; GFX7-NEXT:    v_mov_b32_e32 v0, s4
525; GFX7-NEXT:    s_mov_b32 s0, s2
526; GFX7-NEXT:    s_mov_b32 s1, s3
527; GFX7-NEXT:    s_mov_b32 s3, 0xf000
528; GFX7-NEXT:    s_mov_b32 s2, s4
529; GFX7-NEXT:    v_mov_b32_e32 v1, s5
530; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
531; GFX7-NEXT:    s_waitcnt vmcnt(0)
532; GFX7-NEXT:    ; return to shader part epilog
533  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967296
534  %val = load volatile float, float addrspace(1)* %gep
535  ret float %val
536}
537
538define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967297(float addrspace(1)* inreg %ptr) {
539; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967297:
540; GFX6:       ; %bb.0:
541; GFX6-NEXT:    s_mov_b32 s4, 4
542; GFX6-NEXT:    s_mov_b32 s5, s4
543; GFX6-NEXT:    v_mov_b32_e32 v0, s4
544; GFX6-NEXT:    s_mov_b32 s0, s2
545; GFX6-NEXT:    s_mov_b32 s1, s3
546; GFX6-NEXT:    s_mov_b32 s2, 0
547; GFX6-NEXT:    s_mov_b32 s3, 0xf000
548; GFX6-NEXT:    v_mov_b32_e32 v1, s5
549; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
550; GFX6-NEXT:    s_waitcnt vmcnt(0)
551; GFX6-NEXT:    ; return to shader part epilog
552;
553; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4294967297:
554; GFX7:       ; %bb.0:
555; GFX7-NEXT:    s_mov_b32 s4, 4
556; GFX7-NEXT:    s_mov_b32 s5, s4
557; GFX7-NEXT:    v_mov_b32_e32 v0, s4
558; GFX7-NEXT:    s_mov_b32 s0, s2
559; GFX7-NEXT:    s_mov_b32 s1, s3
560; GFX7-NEXT:    s_mov_b32 s2, 0
561; GFX7-NEXT:    s_mov_b32 s3, 0xf000
562; GFX7-NEXT:    v_mov_b32_e32 v1, s5
563; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
564; GFX7-NEXT:    s_waitcnt vmcnt(0)
565; GFX7-NEXT:    ; return to shader part epilog
566  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967297
567  %val = load volatile float, float addrspace(1)* %gep
568  ret float %val
569}
570
571define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4096(float addrspace(1)* inreg %ptr) {
572; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4096:
573; GFX6:       ; %bb.0:
574; GFX6-NEXT:    s_mov_b32 s0, s2
575; GFX6-NEXT:    s_mov_b32 s1, s3
576; GFX6-NEXT:    s_mov_b32 s2, -1
577; GFX6-NEXT:    s_mov_b32 s3, 0xf000
578; GFX6-NEXT:    s_movk_i32 s4, 0x4000
579; GFX6-NEXT:    buffer_load_dword v0, off, s[0:3], s4 glc
580; GFX6-NEXT:    s_waitcnt vmcnt(0)
581; GFX6-NEXT:    ; return to shader part epilog
582;
583; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4096:
584; GFX7:       ; %bb.0:
585; GFX7-NEXT:    s_mov_b32 s0, s2
586; GFX7-NEXT:    s_mov_b32 s1, s3
587; GFX7-NEXT:    s_mov_b32 s2, -1
588; GFX7-NEXT:    s_mov_b32 s3, 0xf000
589; GFX7-NEXT:    s_movk_i32 s4, 0x4000
590; GFX7-NEXT:    buffer_load_dword v0, off, s[0:3], s4 glc
591; GFX7-NEXT:    s_waitcnt vmcnt(0)
592; GFX7-NEXT:    ; return to shader part epilog
593  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4096
594  %val = load volatile float, float addrspace(1)* %gep
595  ret float %val
596}
597
598define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4095(float addrspace(1)* %ptr) {
599; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4095:
600; GFX6:       ; %bb.0:
601; GFX6-NEXT:    s_mov_b32 s2, 0
602; GFX6-NEXT:    s_mov_b32 s3, 0xf000
603; GFX6-NEXT:    s_mov_b64 s[0:1], 0
604; GFX6-NEXT:    s_movk_i32 s4, 0x3ffc
605; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
606; GFX6-NEXT:    s_waitcnt vmcnt(0)
607; GFX6-NEXT:    ; return to shader part epilog
608;
609; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4095:
610; GFX7:       ; %bb.0:
611; GFX7-NEXT:    s_mov_b32 s2, 0
612; GFX7-NEXT:    s_mov_b32 s3, 0xf000
613; GFX7-NEXT:    s_mov_b64 s[0:1], 0
614; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
615; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
616; GFX7-NEXT:    s_waitcnt vmcnt(0)
617; GFX7-NEXT:    ; return to shader part epilog
618  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4095
619  %val = load volatile float, float addrspace(1)* %gep
620  ret float %val
621}
622
623define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967296(float addrspace(1)* %ptr) {
624; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4294967296:
625; GFX6:       ; %bb.0:
626; GFX6-NEXT:    s_mov_b32 s0, 0
627; GFX6-NEXT:    s_mov_b32 s1, 4
628; GFX6-NEXT:    s_mov_b32 s3, 0xf000
629; GFX6-NEXT:    s_mov_b32 s2, s0
630; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
631; GFX6-NEXT:    s_waitcnt vmcnt(0)
632; GFX6-NEXT:    ; return to shader part epilog
633;
634; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4294967296:
635; GFX7:       ; %bb.0:
636; GFX7-NEXT:    s_mov_b32 s0, 0
637; GFX7-NEXT:    s_mov_b32 s1, 4
638; GFX7-NEXT:    s_mov_b32 s3, 0xf000
639; GFX7-NEXT:    s_mov_b32 s2, s0
640; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
641; GFX7-NEXT:    s_waitcnt vmcnt(0)
642; GFX7-NEXT:    ; return to shader part epilog
643  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967296
644  %val = load volatile float, float addrspace(1)* %gep
645  ret float %val
646}
647
648define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(float addrspace(1)* %ptr) {
649; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4294967297:
650; GFX6:       ; %bb.0:
651; GFX6-NEXT:    s_mov_b32 s0, 4
652; GFX6-NEXT:    s_mov_b32 s1, s0
653; GFX6-NEXT:    s_mov_b32 s2, 0
654; GFX6-NEXT:    s_mov_b32 s3, 0xf000
655; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
656; GFX6-NEXT:    s_waitcnt vmcnt(0)
657; GFX6-NEXT:    ; return to shader part epilog
658;
659; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4294967297:
660; GFX7:       ; %bb.0:
661; GFX7-NEXT:    s_mov_b32 s0, 4
662; GFX7-NEXT:    s_mov_b32 s1, s0
663; GFX7-NEXT:    s_mov_b32 s2, 0
664; GFX7-NEXT:    s_mov_b32 s3, 0xf000
665; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
666; GFX7-NEXT:    s_waitcnt vmcnt(0)
667; GFX7-NEXT:    ; return to shader part epilog
668  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967297
669  %val = load volatile float, float addrspace(1)* %gep
670  ret float %val
671}
672
673define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4096(float addrspace(1)* %ptr) {
674; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4096:
675; GFX6:       ; %bb.0:
676; GFX6-NEXT:    s_mov_b32 s2, 0
677; GFX6-NEXT:    s_mov_b32 s3, 0xf000
678; GFX6-NEXT:    s_mov_b64 s[0:1], 0
679; GFX6-NEXT:    s_movk_i32 s4, 0x4000
680; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
681; GFX6-NEXT:    s_waitcnt vmcnt(0)
682; GFX6-NEXT:    ; return to shader part epilog
683;
684; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4096:
685; GFX7:       ; %bb.0:
686; GFX7-NEXT:    s_mov_b32 s2, 0
687; GFX7-NEXT:    s_mov_b32 s3, 0xf000
688; GFX7-NEXT:    s_mov_b64 s[0:1], 0
689; GFX7-NEXT:    s_movk_i32 s4, 0x4000
690; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
691; GFX7-NEXT:    s_waitcnt vmcnt(0)
692; GFX7-NEXT:    ; return to shader part epilog
693  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4096
694  %val = load volatile float, float addrspace(1)* %gep
695  ret float %val
696}
697
698define amdgpu_ps float @mubuf_load_sgpr_ptr_sgpr_offset(float addrspace(1)* inreg %ptr, i32 inreg %soffset) {
699; GFX6-LABEL: mubuf_load_sgpr_ptr_sgpr_offset:
700; GFX6:       ; %bb.0:
701; GFX6-NEXT:    s_mov_b32 s0, s2
702; GFX6-NEXT:    s_mov_b32 s1, s3
703; GFX6-NEXT:    s_bfe_i64 s[2:3], s[4:5], 0x200000
704; GFX6-NEXT:    s_lshl_b64 s[4:5], s[2:3], 2
705; GFX6-NEXT:    v_mov_b32_e32 v0, s4
706; GFX6-NEXT:    s_mov_b32 s2, 0
707; GFX6-NEXT:    s_mov_b32 s3, 0xf000
708; GFX6-NEXT:    v_mov_b32_e32 v1, s5
709; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
710; GFX6-NEXT:    s_waitcnt vmcnt(0)
711; GFX6-NEXT:    ; return to shader part epilog
712;
713; GFX7-LABEL: mubuf_load_sgpr_ptr_sgpr_offset:
714; GFX7:       ; %bb.0:
715; GFX7-NEXT:    s_mov_b32 s0, s2
716; GFX7-NEXT:    s_mov_b32 s1, s3
717; GFX7-NEXT:    s_bfe_i64 s[2:3], s[4:5], 0x200000
718; GFX7-NEXT:    s_lshl_b64 s[4:5], s[2:3], 2
719; GFX7-NEXT:    v_mov_b32_e32 v0, s4
720; GFX7-NEXT:    s_mov_b32 s2, 0
721; GFX7-NEXT:    s_mov_b32 s3, 0xf000
722; GFX7-NEXT:    v_mov_b32_e32 v1, s5
723; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
724; GFX7-NEXT:    s_waitcnt vmcnt(0)
725; GFX7-NEXT:    ; return to shader part epilog
726  %gep = getelementptr float, float addrspace(1)* %ptr, i32 %soffset
727  %val = load volatile float, float addrspace(1)* %gep
728  ret float %val
729}
730
731define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset(float addrspace(1)* %ptr, i32 inreg %soffset) {
732; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset:
733; GFX6:       ; %bb.0:
734; GFX6-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
735; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
736; GFX6-NEXT:    s_mov_b32 s2, 0
737; GFX6-NEXT:    s_mov_b32 s3, 0xf000
738; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
739; GFX6-NEXT:    s_waitcnt vmcnt(0)
740; GFX6-NEXT:    ; return to shader part epilog
741;
742; GFX7-LABEL: mubuf_load_vgpr_ptr_sgpr_offset:
743; GFX7:       ; %bb.0:
744; GFX7-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
745; GFX7-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
746; GFX7-NEXT:    s_mov_b32 s2, 0
747; GFX7-NEXT:    s_mov_b32 s3, 0xf000
748; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
749; GFX7-NEXT:    s_waitcnt vmcnt(0)
750; GFX7-NEXT:    ; return to shader part epilog
751  %gep = getelementptr float, float addrspace(1)* %ptr, i32 %soffset
752  %val = load volatile float, float addrspace(1)* %gep
753  ret float %val
754}
755
756define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset_offset256(float addrspace(1)* %ptr, i32 inreg %soffset) {
757; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset_offset256:
758; GFX6:       ; %bb.0:
759; GFX6-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
760; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
761; GFX6-NEXT:    s_mov_b32 s2, 0
762; GFX6-NEXT:    s_mov_b32 s3, 0xf000
763; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 glc
764; GFX6-NEXT:    s_waitcnt vmcnt(0)
765; GFX6-NEXT:    ; return to shader part epilog
766;
767; GFX7-LABEL: mubuf_load_vgpr_ptr_sgpr_offset_offset256:
768; GFX7:       ; %bb.0:
769; GFX7-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
770; GFX7-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
771; GFX7-NEXT:    s_mov_b32 s2, 0
772; GFX7-NEXT:    s_mov_b32 s3, 0xf000
773; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 glc
774; GFX7-NEXT:    s_waitcnt vmcnt(0)
775; GFX7-NEXT:    ; return to shader part epilog
776  %gep0 = getelementptr float, float addrspace(1)* %ptr, i32 %soffset
777  %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 256
778  %val = load volatile float, float addrspace(1)* %gep1
779  ret float %val
780}
781
782define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset256_offset(float addrspace(1)* %ptr, i32 inreg %soffset) {
783; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset:
784; GFX6:       ; %bb.0:
785; GFX6-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
786; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
787; GFX6-NEXT:    s_mov_b32 s2, 0
788; GFX6-NEXT:    s_mov_b32 s3, 0xf000
789; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 glc
790; GFX6-NEXT:    s_waitcnt vmcnt(0)
791; GFX6-NEXT:    ; return to shader part epilog
792;
793; GFX7-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset:
794; GFX7:       ; %bb.0:
795; GFX7-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x200000
796; GFX7-NEXT:    s_lshl_b64 s[0:1], s[0:1], 2
797; GFX7-NEXT:    s_mov_b32 s2, 0
798; GFX7-NEXT:    s_mov_b32 s3, 0xf000
799; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 glc
800; GFX7-NEXT:    s_waitcnt vmcnt(0)
801; GFX7-NEXT:    ; return to shader part epilog
802  %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 256
803  %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 %soffset
804  %val = load volatile float, float addrspace(1)* %gep1
805  ret float %val
806}
807
808define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset(float addrspace(1)* inreg %ptr, i32 %voffset) {
809; GFX6-LABEL: mubuf_load_sgpr_ptr_vgpr_offset:
810; GFX6:       ; %bb.0:
811; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
812; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
813; GFX6-NEXT:    s_mov_b32 s0, s2
814; GFX6-NEXT:    s_mov_b32 s1, s3
815; GFX6-NEXT:    s_mov_b32 s2, 0
816; GFX6-NEXT:    s_mov_b32 s3, 0xf000
817; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
818; GFX6-NEXT:    s_waitcnt vmcnt(0)
819; GFX6-NEXT:    ; return to shader part epilog
820;
821; GFX7-LABEL: mubuf_load_sgpr_ptr_vgpr_offset:
822; GFX7:       ; %bb.0:
823; GFX7-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
824; GFX7-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
825; GFX7-NEXT:    s_mov_b32 s0, s2
826; GFX7-NEXT:    s_mov_b32 s1, s3
827; GFX7-NEXT:    s_mov_b32 s2, 0
828; GFX7-NEXT:    s_mov_b32 s3, 0xf000
829; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
830; GFX7-NEXT:    s_waitcnt vmcnt(0)
831; GFX7-NEXT:    ; return to shader part epilog
832  %gep = getelementptr float, float addrspace(1)* %ptr, i32 %voffset
833  %val = load volatile float, float addrspace(1)* %gep
834  ret float %val
835}
836
837define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset_offset4095(float addrspace(1)* inreg %ptr, i32 %voffset) {
838; GFX6-LABEL: mubuf_load_sgpr_ptr_vgpr_offset_offset4095:
839; GFX6:       ; %bb.0:
840; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
841; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
842; GFX6-NEXT:    s_mov_b32 s0, s2
843; GFX6-NEXT:    s_mov_b32 s1, s3
844; GFX6-NEXT:    s_mov_b32 s2, 0
845; GFX6-NEXT:    s_mov_b32 s3, 0xf000
846; GFX6-NEXT:    s_movk_i32 s4, 0x3ffc
847; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
848; GFX6-NEXT:    s_waitcnt vmcnt(0)
849; GFX6-NEXT:    ; return to shader part epilog
850;
851; GFX7-LABEL: mubuf_load_sgpr_ptr_vgpr_offset_offset4095:
852; GFX7:       ; %bb.0:
853; GFX7-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
854; GFX7-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
855; GFX7-NEXT:    s_mov_b32 s0, s2
856; GFX7-NEXT:    s_mov_b32 s1, s3
857; GFX7-NEXT:    s_mov_b32 s2, 0
858; GFX7-NEXT:    s_mov_b32 s3, 0xf000
859; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
860; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
861; GFX7-NEXT:    s_waitcnt vmcnt(0)
862; GFX7-NEXT:    ; return to shader part epilog
863  %gep0 = getelementptr float, float addrspace(1)* %ptr, i32 %voffset
864  %gep1 = getelementptr float, float addrspace(1)* %gep0, i64 4095
865  %val = load volatile float, float addrspace(1)* %gep1
866  ret float %val
867}
868define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095_vgpr_offset(float addrspace(1)* inreg %ptr, i32 %voffset) {
869; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4095_vgpr_offset:
870; GFX6:       ; %bb.0:
871; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
872; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
873; GFX6-NEXT:    s_mov_b32 s0, s2
874; GFX6-NEXT:    s_mov_b32 s1, s3
875; GFX6-NEXT:    s_mov_b32 s2, 0
876; GFX6-NEXT:    s_mov_b32 s3, 0xf000
877; GFX6-NEXT:    s_movk_i32 s4, 0x3ffc
878; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
879; GFX6-NEXT:    s_waitcnt vmcnt(0)
880; GFX6-NEXT:    ; return to shader part epilog
881;
882; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4095_vgpr_offset:
883; GFX7:       ; %bb.0:
884; GFX7-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
885; GFX7-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
886; GFX7-NEXT:    s_mov_b32 s0, s2
887; GFX7-NEXT:    s_mov_b32 s1, s3
888; GFX7-NEXT:    s_mov_b32 s2, 0
889; GFX7-NEXT:    s_mov_b32 s3, 0xf000
890; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
891; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
892; GFX7-NEXT:    s_waitcnt vmcnt(0)
893; GFX7-NEXT:    ; return to shader part epilog
894  %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 4095
895  %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 %voffset
896  %val = load volatile float, float addrspace(1)* %gep1
897  ret float %val
898}
899
900define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr) {
901; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4095:
902; GFX6:       ; %bb.0:
903; GFX6-NEXT:    s_mov_b32 s0, s2
904; GFX6-NEXT:    s_mov_b32 s1, s3
905; GFX6-NEXT:    v_mov_b32_e32 v0, 2
906; GFX6-NEXT:    s_mov_b32 s2, -1
907; GFX6-NEXT:    s_mov_b32 s3, 0xf000
908; GFX6-NEXT:    s_movk_i32 s4, 0x3ffc
909; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
910; GFX6-NEXT:    buffer_atomic_add v0, off, s[0:3], s4 glc
911; GFX6-NEXT:    s_waitcnt vmcnt(0)
912; GFX6-NEXT:    buffer_wbinvl1
913; GFX6-NEXT:    s_waitcnt expcnt(0)
914; GFX6-NEXT:    ; return to shader part epilog
915;
916; GFX7-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4095:
917; GFX7:       ; %bb.0:
918; GFX7-NEXT:    s_mov_b32 s0, s2
919; GFX7-NEXT:    s_mov_b32 s1, s3
920; GFX7-NEXT:    v_mov_b32_e32 v0, 2
921; GFX7-NEXT:    s_mov_b32 s2, -1
922; GFX7-NEXT:    s_mov_b32 s3, 0xf000
923; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
924; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
925; GFX7-NEXT:    buffer_atomic_add v0, off, s[0:3], s4 glc
926; GFX7-NEXT:    s_waitcnt vmcnt(0)
927; GFX7-NEXT:    buffer_wbinvl1
928; GFX7-NEXT:    ; return to shader part epilog
929  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
930  %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst
931  %cast = bitcast i32 %result to float
932  ret float %cast
933}
934
935define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr) {
936; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4294967296:
937; GFX6:       ; %bb.0:
938; GFX6-NEXT:    s_mov_b32 s4, 0
939; GFX6-NEXT:    s_mov_b32 s5, 4
940; GFX6-NEXT:    v_mov_b32_e32 v1, s4
941; GFX6-NEXT:    s_mov_b32 s0, s2
942; GFX6-NEXT:    s_mov_b32 s1, s3
943; GFX6-NEXT:    v_mov_b32_e32 v0, 2
944; GFX6-NEXT:    s_mov_b32 s3, 0xf000
945; GFX6-NEXT:    s_mov_b32 s2, s4
946; GFX6-NEXT:    v_mov_b32_e32 v2, s5
947; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
948; GFX6-NEXT:    buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc
949; GFX6-NEXT:    s_waitcnt vmcnt(0)
950; GFX6-NEXT:    buffer_wbinvl1
951; GFX6-NEXT:    s_waitcnt expcnt(0)
952; GFX6-NEXT:    ; return to shader part epilog
953;
954; GFX7-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4294967296:
955; GFX7:       ; %bb.0:
956; GFX7-NEXT:    s_mov_b32 s4, 0
957; GFX7-NEXT:    s_mov_b32 s5, 4
958; GFX7-NEXT:    v_mov_b32_e32 v1, s4
959; GFX7-NEXT:    s_mov_b32 s0, s2
960; GFX7-NEXT:    s_mov_b32 s1, s3
961; GFX7-NEXT:    v_mov_b32_e32 v0, 2
962; GFX7-NEXT:    s_mov_b32 s3, 0xf000
963; GFX7-NEXT:    s_mov_b32 s2, s4
964; GFX7-NEXT:    v_mov_b32_e32 v2, s5
965; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
966; GFX7-NEXT:    buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc
967; GFX7-NEXT:    s_waitcnt vmcnt(0)
968; GFX7-NEXT:    buffer_wbinvl1
969; GFX7-NEXT:    ; return to shader part epilog
970  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
971  %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst
972  %cast = bitcast i32 %result to float
973  ret float %cast
974}
975
976define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr) {
977; GFX6-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4095:
978; GFX6:       ; %bb.0:
979; GFX6-NEXT:    v_mov_b32_e32 v2, 2
980; GFX6-NEXT:    s_mov_b32 s2, 0
981; GFX6-NEXT:    s_mov_b32 s3, 0xf000
982; GFX6-NEXT:    s_mov_b64 s[0:1], 0
983; GFX6-NEXT:    s_movk_i32 s4, 0x3ffc
984; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
985; GFX6-NEXT:    buffer_atomic_add v2, v[0:1], s[0:3], s4 addr64 glc
986; GFX6-NEXT:    s_waitcnt vmcnt(0)
987; GFX6-NEXT:    buffer_wbinvl1
988; GFX6-NEXT:    v_mov_b32_e32 v0, v2
989; GFX6-NEXT:    s_waitcnt expcnt(0)
990; GFX6-NEXT:    ; return to shader part epilog
991;
992; GFX7-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4095:
993; GFX7:       ; %bb.0:
994; GFX7-NEXT:    v_mov_b32_e32 v2, 2
995; GFX7-NEXT:    s_mov_b32 s2, 0
996; GFX7-NEXT:    s_mov_b32 s3, 0xf000
997; GFX7-NEXT:    s_mov_b64 s[0:1], 0
998; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
999; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1000; GFX7-NEXT:    buffer_atomic_add v2, v[0:1], s[0:3], s4 addr64 glc
1001; GFX7-NEXT:    s_waitcnt vmcnt(0)
1002; GFX7-NEXT:    buffer_wbinvl1
1003; GFX7-NEXT:    v_mov_b32_e32 v0, v2
1004; GFX7-NEXT:    ; return to shader part epilog
1005  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
1006  %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst
1007  %cast = bitcast i32 %result to float
1008  ret float %cast
1009}
1010
1011define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr) {
1012; GFX6-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4294967296:
1013; GFX6:       ; %bb.0:
1014; GFX6-NEXT:    s_mov_b32 s0, 0
1015; GFX6-NEXT:    s_mov_b32 s1, 4
1016; GFX6-NEXT:    v_mov_b32_e32 v2, 2
1017; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1018; GFX6-NEXT:    s_mov_b32 s2, s0
1019; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1020; GFX6-NEXT:    buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
1021; GFX6-NEXT:    s_waitcnt vmcnt(0)
1022; GFX6-NEXT:    buffer_wbinvl1
1023; GFX6-NEXT:    v_mov_b32_e32 v0, v2
1024; GFX6-NEXT:    s_waitcnt expcnt(0)
1025; GFX6-NEXT:    ; return to shader part epilog
1026;
1027; GFX7-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4294967296:
1028; GFX7:       ; %bb.0:
1029; GFX7-NEXT:    s_mov_b32 s0, 0
1030; GFX7-NEXT:    s_mov_b32 s1, 4
1031; GFX7-NEXT:    v_mov_b32_e32 v2, 2
1032; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1033; GFX7-NEXT:    s_mov_b32 s2, s0
1034; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1035; GFX7-NEXT:    buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
1036; GFX7-NEXT:    s_waitcnt vmcnt(0)
1037; GFX7-NEXT:    buffer_wbinvl1
1038; GFX7-NEXT:    v_mov_b32_e32 v0, v2
1039; GFX7-NEXT:    ; return to shader part epilog
1040  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
1041  %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst
1042  %cast = bitcast i32 %result to float
1043  ret float %cast
1044}
1045
1046define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) {
1047; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_vgpr_offset:
1048; GFX6:       ; %bb.0:
1049; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
1050; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
1051; GFX6-NEXT:    s_mov_b32 s0, s2
1052; GFX6-NEXT:    s_mov_b32 s1, s3
1053; GFX6-NEXT:    v_mov_b32_e32 v2, 2
1054; GFX6-NEXT:    s_mov_b32 s2, 0
1055; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1056; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1057; GFX6-NEXT:    buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
1058; GFX6-NEXT:    s_waitcnt vmcnt(0)
1059; GFX6-NEXT:    buffer_wbinvl1
1060; GFX6-NEXT:    v_mov_b32_e32 v0, v2
1061; GFX6-NEXT:    s_waitcnt expcnt(0)
1062; GFX6-NEXT:    ; return to shader part epilog
1063;
1064; GFX7-LABEL: mubuf_atomicrmw_sgpr_ptr_vgpr_offset:
1065; GFX7:       ; %bb.0:
1066; GFX7-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
1067; GFX7-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
1068; GFX7-NEXT:    s_mov_b32 s0, s2
1069; GFX7-NEXT:    s_mov_b32 s1, s3
1070; GFX7-NEXT:    v_mov_b32_e32 v2, 2
1071; GFX7-NEXT:    s_mov_b32 s2, 0
1072; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1073; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1074; GFX7-NEXT:    buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
1075; GFX7-NEXT:    s_waitcnt vmcnt(0)
1076; GFX7-NEXT:    buffer_wbinvl1
1077; GFX7-NEXT:    v_mov_b32_e32 v0, v2
1078; GFX7-NEXT:    ; return to shader part epilog
1079  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset
1080  %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst
1081  %cast = bitcast i32 %result to float
1082  ret float %cast
1083}
1084
1085define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr, i32 %old, i32 %in) {
1086; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4095:
1087; GFX6:       ; %bb.0:
1088; GFX6-NEXT:    s_mov_b32 s0, s2
1089; GFX6-NEXT:    s_mov_b32 s1, s3
1090; GFX6-NEXT:    v_mov_b32_e32 v2, v0
1091; GFX6-NEXT:    s_mov_b32 s2, -1
1092; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1093; GFX6-NEXT:    s_movk_i32 s4, 0x3ffc
1094; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1095; GFX6-NEXT:    buffer_atomic_cmpswap v[1:2], off, s[0:3], s4 glc
1096; GFX6-NEXT:    s_waitcnt vmcnt(0)
1097; GFX6-NEXT:    buffer_wbinvl1
1098; GFX6-NEXT:    v_mov_b32_e32 v0, v1
1099; GFX6-NEXT:    s_waitcnt expcnt(0)
1100; GFX6-NEXT:    ; return to shader part epilog
1101;
1102; GFX7-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4095:
1103; GFX7:       ; %bb.0:
1104; GFX7-NEXT:    s_mov_b32 s0, s2
1105; GFX7-NEXT:    s_mov_b32 s1, s3
1106; GFX7-NEXT:    v_mov_b32_e32 v2, v0
1107; GFX7-NEXT:    s_mov_b32 s2, -1
1108; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1109; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
1110; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1111; GFX7-NEXT:    buffer_atomic_cmpswap v[1:2], off, s[0:3], s4 glc
1112; GFX7-NEXT:    s_waitcnt vmcnt(0)
1113; GFX7-NEXT:    buffer_wbinvl1
1114; GFX7-NEXT:    v_mov_b32_e32 v0, v1
1115; GFX7-NEXT:    ; return to shader part epilog
1116  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
1117  %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
1118  %result = extractvalue { i32, i1 } %result.struct, 0
1119  %cast = bitcast i32 %result to float
1120  ret float %cast
1121}
1122
1123define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr, i32 %old, i32 %in) {
1124; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4294967296:
1125; GFX6:       ; %bb.0:
1126; GFX6-NEXT:    s_mov_b32 s4, 0
1127; GFX6-NEXT:    s_mov_b32 s5, 4
1128; GFX6-NEXT:    v_mov_b32_e32 v3, s4
1129; GFX6-NEXT:    s_mov_b32 s0, s2
1130; GFX6-NEXT:    s_mov_b32 s1, s3
1131; GFX6-NEXT:    v_mov_b32_e32 v2, v0
1132; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1133; GFX6-NEXT:    s_mov_b32 s2, s4
1134; GFX6-NEXT:    v_mov_b32_e32 v4, s5
1135; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1136; GFX6-NEXT:    buffer_atomic_cmpswap v[1:2], v[3:4], s[0:3], 0 addr64 glc
1137; GFX6-NEXT:    s_waitcnt vmcnt(0)
1138; GFX6-NEXT:    buffer_wbinvl1
1139; GFX6-NEXT:    v_mov_b32_e32 v0, v1
1140; GFX6-NEXT:    s_waitcnt expcnt(0)
1141; GFX6-NEXT:    ; return to shader part epilog
1142;
1143; GFX7-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4294967296:
1144; GFX7:       ; %bb.0:
1145; GFX7-NEXT:    s_mov_b32 s4, 0
1146; GFX7-NEXT:    s_mov_b32 s5, 4
1147; GFX7-NEXT:    v_mov_b32_e32 v3, s4
1148; GFX7-NEXT:    s_mov_b32 s0, s2
1149; GFX7-NEXT:    s_mov_b32 s1, s3
1150; GFX7-NEXT:    v_mov_b32_e32 v2, v0
1151; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1152; GFX7-NEXT:    s_mov_b32 s2, s4
1153; GFX7-NEXT:    v_mov_b32_e32 v4, s5
1154; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1155; GFX7-NEXT:    buffer_atomic_cmpswap v[1:2], v[3:4], s[0:3], 0 addr64 glc
1156; GFX7-NEXT:    s_waitcnt vmcnt(0)
1157; GFX7-NEXT:    buffer_wbinvl1
1158; GFX7-NEXT:    v_mov_b32_e32 v0, v1
1159; GFX7-NEXT:    ; return to shader part epilog
1160  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
1161  %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
1162  %result = extractvalue { i32, i1 } %result.struct, 0
1163  %cast = bitcast i32 %result to float
1164  ret float %cast
1165}
1166
1167define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr, i32 %old, i32 %in) {
1168; GFX6-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4095:
1169; GFX6:       ; %bb.0:
1170; GFX6-NEXT:    v_mov_b32_e32 v4, v2
1171; GFX6-NEXT:    s_mov_b32 s2, 0
1172; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1173; GFX6-NEXT:    s_mov_b64 s[0:1], 0
1174; GFX6-NEXT:    s_movk_i32 s4, 0x3ffc
1175; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1176; GFX6-NEXT:    buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], s4 addr64 glc
1177; GFX6-NEXT:    s_waitcnt vmcnt(0)
1178; GFX6-NEXT:    buffer_wbinvl1
1179; GFX6-NEXT:    v_mov_b32_e32 v0, v3
1180; GFX6-NEXT:    s_waitcnt expcnt(0)
1181; GFX6-NEXT:    ; return to shader part epilog
1182;
1183; GFX7-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4095:
1184; GFX7:       ; %bb.0:
1185; GFX7-NEXT:    v_mov_b32_e32 v4, v2
1186; GFX7-NEXT:    s_mov_b32 s2, 0
1187; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1188; GFX7-NEXT:    s_mov_b64 s[0:1], 0
1189; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
1190; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1191; GFX7-NEXT:    buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], s4 addr64 glc
1192; GFX7-NEXT:    s_waitcnt vmcnt(0)
1193; GFX7-NEXT:    buffer_wbinvl1
1194; GFX7-NEXT:    v_mov_b32_e32 v0, v3
1195; GFX7-NEXT:    ; return to shader part epilog
1196  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
1197  %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
1198  %result = extractvalue { i32, i1 } %result.struct, 0
1199  %cast = bitcast i32 %result to float
1200  ret float %cast
1201}
1202
1203define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr, i32 %old, i32 %in) {
1204; GFX6-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4294967296:
1205; GFX6:       ; %bb.0:
1206; GFX6-NEXT:    s_mov_b32 s0, 0
1207; GFX6-NEXT:    v_mov_b32_e32 v4, v2
1208; GFX6-NEXT:    s_mov_b32 s1, 4
1209; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1210; GFX6-NEXT:    s_mov_b32 s2, s0
1211; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1212; GFX6-NEXT:    buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], 0 addr64 glc
1213; GFX6-NEXT:    s_waitcnt vmcnt(0)
1214; GFX6-NEXT:    buffer_wbinvl1
1215; GFX6-NEXT:    v_mov_b32_e32 v0, v3
1216; GFX6-NEXT:    s_waitcnt expcnt(0)
1217; GFX6-NEXT:    ; return to shader part epilog
1218;
1219; GFX7-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4294967296:
1220; GFX7:       ; %bb.0:
1221; GFX7-NEXT:    s_mov_b32 s0, 0
1222; GFX7-NEXT:    v_mov_b32_e32 v4, v2
1223; GFX7-NEXT:    s_mov_b32 s1, 4
1224; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1225; GFX7-NEXT:    s_mov_b32 s2, s0
1226; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1227; GFX7-NEXT:    buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], 0 addr64 glc
1228; GFX7-NEXT:    s_waitcnt vmcnt(0)
1229; GFX7-NEXT:    buffer_wbinvl1
1230; GFX7-NEXT:    v_mov_b32_e32 v0, v3
1231; GFX7-NEXT:    ; return to shader part epilog
1232  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
1233  %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
1234  %result = extractvalue { i32, i1 } %result.struct, 0
1235  %cast = bitcast i32 %result to float
1236  ret float %cast
1237}
1238
1239define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset, i32 %old, i32 %in) {
1240; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_vgpr_offset:
1241; GFX6:       ; %bb.0:
1242; GFX6-NEXT:    v_mov_b32_e32 v3, v1
1243; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
1244; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
1245; GFX6-NEXT:    s_mov_b32 s0, s2
1246; GFX6-NEXT:    s_mov_b32 s1, s3
1247; GFX6-NEXT:    s_mov_b32 s2, 0
1248; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1249; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1250; GFX6-NEXT:    buffer_atomic_cmpswap v[2:3], v[0:1], s[0:3], 0 addr64 glc
1251; GFX6-NEXT:    s_waitcnt vmcnt(0)
1252; GFX6-NEXT:    buffer_wbinvl1
1253; GFX6-NEXT:    v_mov_b32_e32 v0, v2
1254; GFX6-NEXT:    s_waitcnt expcnt(0)
1255; GFX6-NEXT:    ; return to shader part epilog
1256;
1257; GFX7-LABEL: mubuf_cmpxchg_sgpr_ptr_vgpr_offset:
1258; GFX7:       ; %bb.0:
1259; GFX7-NEXT:    v_mov_b32_e32 v3, v1
1260; GFX7-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
1261; GFX7-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
1262; GFX7-NEXT:    s_mov_b32 s0, s2
1263; GFX7-NEXT:    s_mov_b32 s1, s3
1264; GFX7-NEXT:    s_mov_b32 s2, 0
1265; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1266; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1267; GFX7-NEXT:    buffer_atomic_cmpswap v[2:3], v[0:1], s[0:3], 0 addr64 glc
1268; GFX7-NEXT:    s_waitcnt vmcnt(0)
1269; GFX7-NEXT:    buffer_wbinvl1
1270; GFX7-NEXT:    v_mov_b32_e32 v0, v2
1271; GFX7-NEXT:    ; return to shader part epilog
1272  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset
1273  %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
1274  %result = extractvalue { i32, i1 } %result.struct, 0
1275  %cast = bitcast i32 %result to float
1276  ret float %cast
1277}
1278