1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn--amdpal -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=CI %s
3; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX9 %s
4; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s
5
6declare i32 @llvm.amdgcn.workitem.id.x() #0
7
8@lds.obj = addrspace(3) global [256 x i32] undef, align 4
9
10define amdgpu_kernel void @write_ds_sub0_offset0_global() #0 {
11; CI-LABEL: write_ds_sub0_offset0_global:
12; CI:       ; %bb.0: ; %entry
13; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
14; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
15; CI-NEXT:    v_mov_b32_e32 v1, 0x7b
16; CI-NEXT:    s_mov_b32 m0, -1
17; CI-NEXT:    ds_write_b32 v0, v1 offset:12
18; CI-NEXT:    s_endpgm
19;
20; GFX9-LABEL: write_ds_sub0_offset0_global:
21; GFX9:       ; %bb.0: ; %entry
22; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
23; GFX9-NEXT:    v_sub_u32_e32 v0, 0, v0
24; GFX9-NEXT:    v_mov_b32_e32 v1, 0x7b
25; GFX9-NEXT:    ds_write_b32 v0, v1 offset:12
26; GFX9-NEXT:    s_endpgm
27;
28; GFX10-LABEL: write_ds_sub0_offset0_global:
29; GFX10:       ; %bb.0: ; %entry
30; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
31; GFX10-NEXT:    v_mov_b32_e32 v1, 0x7b
32; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 0, v0
33; GFX10-NEXT:    ds_write_b32 v0, v1 offset:12
34; GFX10-NEXT:    s_endpgm
35entry:
36  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #1
37  %sub1 = sub i32 0, %x.i
38  %tmp0 = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds.obj, i32 0, i32 %sub1
39  %arrayidx = getelementptr inbounds i32, i32 addrspace(3)* %tmp0, i32 3
40  store i32 123, i32 addrspace(3)* %arrayidx
41  ret void
42}
43
44define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.val) #0 {
45; CI-LABEL: write_ds_sub0_offset0_global_clamp_bit:
46; CI:       ; %bb.0: ; %entry
47; CI-NEXT:    s_load_dword s0, s[0:1], 0x9
48; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
49; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
50; CI-NEXT:    s_mov_b64 vcc, 0
51; CI-NEXT:    v_mov_b32_e32 v2, 0x7b
52; CI-NEXT:    s_waitcnt lgkmcnt(0)
53; CI-NEXT:    v_mov_b32_e32 v1, s0
54; CI-NEXT:    s_mov_b32 s0, 0
55; CI-NEXT:    v_div_fmas_f32 v1, v1, v1, v1
56; CI-NEXT:    s_mov_b32 m0, -1
57; CI-NEXT:    s_mov_b32 s3, 0xf000
58; CI-NEXT:    s_mov_b32 s2, -1
59; CI-NEXT:    s_mov_b32 s1, s0
60; CI-NEXT:    ds_write_b32 v0, v2 offset:12
61; CI-NEXT:    buffer_store_dword v1, off, s[0:3], 0
62; CI-NEXT:    s_waitcnt vmcnt(0)
63; CI-NEXT:    s_endpgm
64;
65; GFX9-LABEL: write_ds_sub0_offset0_global_clamp_bit:
66; GFX9:       ; %bb.0: ; %entry
67; GFX9-NEXT:    s_load_dword s0, s[0:1], 0x24
68; GFX9-NEXT:    s_mov_b64 vcc, 0
69; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
70; GFX9-NEXT:    v_sub_u32_e32 v3, 0, v0
71; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7b
72; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
73; GFX9-NEXT:    v_mov_b32_e32 v1, s0
74; GFX9-NEXT:    v_div_fmas_f32 v2, v1, v1, v1
75; GFX9-NEXT:    v_mov_b32_e32 v0, 0
76; GFX9-NEXT:    v_mov_b32_e32 v1, 0
77; GFX9-NEXT:    ds_write_b32 v3, v4 offset:12
78; GFX9-NEXT:    global_store_dword v[0:1], v2, off
79; GFX9-NEXT:    s_waitcnt vmcnt(0)
80; GFX9-NEXT:    s_endpgm
81;
82; GFX10-LABEL: write_ds_sub0_offset0_global_clamp_bit:
83; GFX10:       ; %bb.0: ; %entry
84; GFX10-NEXT:    s_load_dword s0, s[0:1], 0x24
85; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
86; GFX10-NEXT:    s_mov_b32 vcc_lo, 0
87; GFX10-NEXT:    v_mov_b32_e32 v3, 0x7b
88; GFX10-NEXT:    v_sub_nc_u32_e32 v2, 0, v0
89; GFX10-NEXT:    v_mov_b32_e32 v0, 0
90; GFX10-NEXT:    v_mov_b32_e32 v1, 0
91; GFX10-NEXT:    ds_write_b32 v2, v3 offset:12
92; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
93; GFX10-NEXT:    v_div_fmas_f32 v4, s0, s0, s0
94; GFX10-NEXT:    global_store_dword v[0:1], v4, off
95; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
96; GFX10-NEXT:    s_endpgm
97entry:
98  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #1
99  %sub1 = sub i32 0, %x.i
100  %tmp0 = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds.obj, i32 0, i32 %sub1
101  %arrayidx = getelementptr inbounds i32, i32 addrspace(3)* %tmp0, i32 3
102  store i32 123, i32 addrspace(3)* %arrayidx
103  %fmas = call float @llvm.amdgcn.div.fmas.f32(float %dummy.val, float %dummy.val, float %dummy.val, i1 false)
104  store volatile float %fmas, float addrspace(1)* null
105  ret void
106}
107
108define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset() #1 {
109; CI-LABEL: add_x_shl_neg_to_sub_max_offset:
110; CI:       ; %bb.0:
111; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
112; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
113; CI-NEXT:    v_mov_b32_e32 v1, 13
114; CI-NEXT:    s_mov_b32 m0, -1
115; CI-NEXT:    ds_write_b8 v0, v1 offset:65535
116; CI-NEXT:    s_endpgm
117;
118; GFX9-LABEL: add_x_shl_neg_to_sub_max_offset:
119; GFX9:       ; %bb.0:
120; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
121; GFX9-NEXT:    v_sub_u32_e32 v0, 0, v0
122; GFX9-NEXT:    v_mov_b32_e32 v1, 13
123; GFX9-NEXT:    ds_write_b8 v0, v1 offset:65535
124; GFX9-NEXT:    s_endpgm
125;
126; GFX10-LABEL: add_x_shl_neg_to_sub_max_offset:
127; GFX10:       ; %bb.0:
128; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
129; GFX10-NEXT:    v_mov_b32_e32 v1, 13
130; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 0, v0
131; GFX10-NEXT:    ds_write_b8 v0, v1 offset:65535
132; GFX10-NEXT:    s_endpgm
133  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
134  %neg = sub i32 0, %x.i
135  %shl = shl i32 %neg, 2
136  %add = add i32 65535, %shl
137  %ptr = inttoptr i32 %add to i8 addrspace(3)*
138  store i8 13, i8 addrspace(3)* %ptr
139  ret void
140}
141
142define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_p1() #1 {
143; CI-LABEL: add_x_shl_neg_to_sub_max_offset_p1:
144; CI:       ; %bb.0:
145; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
146; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0x10000, v0
147; CI-NEXT:    v_mov_b32_e32 v1, 13
148; CI-NEXT:    s_mov_b32 m0, -1
149; CI-NEXT:    ds_write_b8 v0, v1
150; CI-NEXT:    s_endpgm
151;
152; GFX9-LABEL: add_x_shl_neg_to_sub_max_offset_p1:
153; GFX9:       ; %bb.0:
154; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
155; GFX9-NEXT:    v_sub_u32_e32 v0, 0x10000, v0
156; GFX9-NEXT:    v_mov_b32_e32 v1, 13
157; GFX9-NEXT:    ds_write_b8 v0, v1
158; GFX9-NEXT:    s_endpgm
159;
160; GFX10-LABEL: add_x_shl_neg_to_sub_max_offset_p1:
161; GFX10:       ; %bb.0:
162; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
163; GFX10-NEXT:    v_mov_b32_e32 v1, 13
164; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 0x10000, v0
165; GFX10-NEXT:    ds_write_b8 v0, v1
166; GFX10-NEXT:    s_endpgm
167  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
168  %neg = sub i32 0, %x.i
169  %shl = shl i32 %neg, 2
170  %add = add i32 65536, %shl
171  %ptr = inttoptr i32 %add to i8 addrspace(3)*
172  store i8 13, i8 addrspace(3)* %ptr
173  ret void
174}
175
176define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use() #1 {
177; CI-LABEL: add_x_shl_neg_to_sub_multi_use:
178; CI:       ; %bb.0:
179; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
180; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
181; CI-NEXT:    v_mov_b32_e32 v1, 13
182; CI-NEXT:    s_mov_b32 m0, -1
183; CI-NEXT:    ds_write_b32 v0, v1 offset:123
184; CI-NEXT:    ds_write_b32 v0, v1 offset:456
185; CI-NEXT:    s_endpgm
186;
187; GFX9-LABEL: add_x_shl_neg_to_sub_multi_use:
188; GFX9:       ; %bb.0:
189; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
190; GFX9-NEXT:    v_sub_u32_e32 v0, 0, v0
191; GFX9-NEXT:    v_mov_b32_e32 v1, 13
192; GFX9-NEXT:    ds_write_b32 v0, v1 offset:123
193; GFX9-NEXT:    ds_write_b32 v0, v1 offset:456
194; GFX9-NEXT:    s_endpgm
195;
196; GFX10-LABEL: add_x_shl_neg_to_sub_multi_use:
197; GFX10:       ; %bb.0:
198; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
199; GFX10-NEXT:    v_mov_b32_e32 v1, 13
200; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 0, v0
201; GFX10-NEXT:    ds_write_b32 v0, v1 offset:123
202; GFX10-NEXT:    ds_write_b32 v0, v1 offset:456
203; GFX10-NEXT:    s_endpgm
204  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
205  %neg = sub i32 0, %x.i
206  %shl = shl i32 %neg, 2
207  %add0 = add i32 123, %shl
208  %add1 = add i32 456, %shl
209  %ptr0 = inttoptr i32 %add0 to i32 addrspace(3)*
210  store volatile i32 13, i32 addrspace(3)* %ptr0
211  %ptr1 = inttoptr i32 %add1 to i32 addrspace(3)*
212  store volatile i32 13, i32 addrspace(3)* %ptr1
213  ret void
214}
215
216define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use_same_offset() #1 {
217; CI-LABEL: add_x_shl_neg_to_sub_multi_use_same_offset:
218; CI:       ; %bb.0:
219; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
220; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
221; CI-NEXT:    v_mov_b32_e32 v1, 13
222; CI-NEXT:    s_mov_b32 m0, -1
223; CI-NEXT:    ds_write_b32 v0, v1 offset:123
224; CI-NEXT:    ds_write_b32 v0, v1 offset:123
225; CI-NEXT:    s_endpgm
226;
227; GFX9-LABEL: add_x_shl_neg_to_sub_multi_use_same_offset:
228; GFX9:       ; %bb.0:
229; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
230; GFX9-NEXT:    v_sub_u32_e32 v0, 0, v0
231; GFX9-NEXT:    v_mov_b32_e32 v1, 13
232; GFX9-NEXT:    ds_write_b32 v0, v1 offset:123
233; GFX9-NEXT:    ds_write_b32 v0, v1 offset:123
234; GFX9-NEXT:    s_endpgm
235;
236; GFX10-LABEL: add_x_shl_neg_to_sub_multi_use_same_offset:
237; GFX10:       ; %bb.0:
238; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
239; GFX10-NEXT:    v_mov_b32_e32 v1, 13
240; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 0, v0
241; GFX10-NEXT:    ds_write_b32 v0, v1 offset:123
242; GFX10-NEXT:    ds_write_b32 v0, v1 offset:123
243; GFX10-NEXT:    s_endpgm
244  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
245  %neg = sub i32 0, %x.i
246  %shl = shl i32 %neg, 2
247  %add = add i32 123, %shl
248  %ptr = inttoptr i32 %add to i32 addrspace(3)*
249  store volatile i32 13, i32 addrspace(3)* %ptr
250  store volatile i32 13, i32 addrspace(3)* %ptr
251  ret void
252}
253
254define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 {
255; CI-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset:
256; CI:       ; %bb.0:
257; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
258; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0x3fb, v0
259; CI-NEXT:    v_mov_b32_e32 v1, 0x7b
260; CI-NEXT:    v_mov_b32_e32 v2, 0
261; CI-NEXT:    s_mov_b32 m0, -1
262; CI-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
263; CI-NEXT:    s_endpgm
264;
265; GFX9-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset:
266; GFX9:       ; %bb.0:
267; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
268; GFX9-NEXT:    v_sub_u32_e32 v0, 0x3fb, v0
269; GFX9-NEXT:    v_mov_b32_e32 v1, 0x7b
270; GFX9-NEXT:    v_mov_b32_e32 v2, 0
271; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
272; GFX9-NEXT:    s_endpgm
273;
274; GFX10-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset:
275; GFX10:       ; %bb.0:
276; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
277; GFX10-NEXT:    v_mov_b32_e32 v1, 0x7b
278; GFX10-NEXT:    v_mov_b32_e32 v2, 0
279; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 0x3fb, v0
280; GFX10-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
281; GFX10-NEXT:    s_endpgm
282  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
283  %neg = sub i32 0, %x.i
284  %shl = shl i32 %neg, 2
285  %add = add i32 1019, %shl
286  %ptr = inttoptr i32 %add to i64 addrspace(3)*
287  store i64 123, i64 addrspace(3)* %ptr, align 4
288  ret void
289}
290
291define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit(float %dummy.val) #1 {
292; CI-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit:
293; CI:       ; %bb.0:
294; CI-NEXT:    s_load_dword s0, s[0:1], 0x9
295; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
296; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0x3fb, v0
297; CI-NEXT:    s_mov_b64 vcc, 0
298; CI-NEXT:    v_mov_b32_e32 v2, 0x7b
299; CI-NEXT:    s_waitcnt lgkmcnt(0)
300; CI-NEXT:    v_mov_b32_e32 v1, s0
301; CI-NEXT:    s_mov_b32 s0, 0
302; CI-NEXT:    v_div_fmas_f32 v1, v1, v1, v1
303; CI-NEXT:    v_mov_b32_e32 v3, 0
304; CI-NEXT:    s_mov_b32 m0, -1
305; CI-NEXT:    s_mov_b32 s3, 0xf000
306; CI-NEXT:    s_mov_b32 s2, -1
307; CI-NEXT:    s_mov_b32 s1, s0
308; CI-NEXT:    ds_write2_b32 v0, v2, v3 offset1:1
309; CI-NEXT:    buffer_store_dword v1, off, s[0:3], 0
310; CI-NEXT:    s_waitcnt vmcnt(0)
311; CI-NEXT:    s_endpgm
312;
313; GFX9-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit:
314; GFX9:       ; %bb.0:
315; GFX9-NEXT:    s_load_dword s0, s[0:1], 0x24
316; GFX9-NEXT:    s_mov_b64 vcc, 0
317; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
318; GFX9-NEXT:    v_sub_u32_e32 v3, 0x3fb, v0
319; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7b
320; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
321; GFX9-NEXT:    v_mov_b32_e32 v1, s0
322; GFX9-NEXT:    v_div_fmas_f32 v2, v1, v1, v1
323; GFX9-NEXT:    v_mov_b32_e32 v0, 0
324; GFX9-NEXT:    v_mov_b32_e32 v5, 0
325; GFX9-NEXT:    v_mov_b32_e32 v1, 0
326; GFX9-NEXT:    ds_write2_b32 v3, v4, v5 offset1:1
327; GFX9-NEXT:    global_store_dword v[0:1], v2, off
328; GFX9-NEXT:    s_waitcnt vmcnt(0)
329; GFX9-NEXT:    s_endpgm
330;
331; GFX10-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit:
332; GFX10:       ; %bb.0:
333; GFX10-NEXT:    s_load_dword s0, s[0:1], 0x24
334; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
335; GFX10-NEXT:    s_mov_b32 vcc_lo, 0
336; GFX10-NEXT:    v_mov_b32_e32 v3, 0x7b
337; GFX10-NEXT:    v_mov_b32_e32 v4, 0
338; GFX10-NEXT:    v_sub_nc_u32_e32 v2, 0x3fb, v0
339; GFX10-NEXT:    v_mov_b32_e32 v0, 0
340; GFX10-NEXT:    v_mov_b32_e32 v1, 0
341; GFX10-NEXT:    ds_write2_b32 v2, v3, v4 offset1:1
342; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
343; GFX10-NEXT:    v_div_fmas_f32 v5, s0, s0, s0
344; GFX10-NEXT:    global_store_dword v[0:1], v5, off
345; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
346; GFX10-NEXT:    s_endpgm
347  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
348  %neg = sub i32 0, %x.i
349  %shl = shl i32 %neg, 2
350  %add = add i32 1019, %shl
351  %ptr = inttoptr i32 %add to i64 addrspace(3)*
352  store i64 123, i64 addrspace(3)* %ptr, align 4
353  %fmas = call float @llvm.amdgcn.div.fmas.f32(float %dummy.val, float %dummy.val, float %dummy.val, i1 false)
354  store volatile float %fmas, float addrspace(1)* null
355  ret void
356}
357
358define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() #1 {
359; CI-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1:
360; CI:       ; %bb.0:
361; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
362; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0x3fc, v0
363; CI-NEXT:    v_mov_b32_e32 v1, 0x7b
364; CI-NEXT:    v_mov_b32_e32 v2, 0
365; CI-NEXT:    s_mov_b32 m0, -1
366; CI-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
367; CI-NEXT:    s_endpgm
368;
369; GFX9-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1:
370; GFX9:       ; %bb.0:
371; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
372; GFX9-NEXT:    v_sub_u32_e32 v0, 0x3fc, v0
373; GFX9-NEXT:    v_mov_b32_e32 v1, 0x7b
374; GFX9-NEXT:    v_mov_b32_e32 v2, 0
375; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
376; GFX9-NEXT:    s_endpgm
377;
378; GFX10-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1:
379; GFX10:       ; %bb.0:
380; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
381; GFX10-NEXT:    v_mov_b32_e32 v1, 0x7b
382; GFX10-NEXT:    v_mov_b32_e32 v2, 0
383; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 0x3fc, v0
384; GFX10-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
385; GFX10-NEXT:    s_endpgm
386  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
387  %neg = sub i32 0, %x.i
388  %shl = shl i32 %neg, 2
389  %add = add i32 1020, %shl
390  %ptr = inttoptr i32 %add to i64 addrspace(3)*
391  store i64 123, i64 addrspace(3)* %ptr, align 4
392  ret void
393}
394
395declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1)
396
397attributes #0 = { nounwind readnone }
398attributes #1 = { nounwind }
399attributes #2 = { nounwind convergent }
400