1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -verify-machineinstrs < %s | FileCheck --check-prefix=GFX7 %s
4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
5
6; FIXME:
7; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefix=GFX6 %s
8
9define <4 x i32> @load_lds_v4i32(<4 x i32> addrspace(3)* %ptr) {
10; GFX9-LABEL: load_lds_v4i32:
11; GFX9:       ; %bb.0:
12; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13; GFX9-NEXT:    ds_read_b128 v[0:3], v0
14; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
15; GFX9-NEXT:    s_setpc_b64 s[30:31]
16;
17; GFX7-LABEL: load_lds_v4i32:
18; GFX7:       ; %bb.0:
19; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20; GFX7-NEXT:    s_mov_b32 m0, -1
21; GFX7-NEXT:    ds_read_b128 v[0:3], v0
22; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
23; GFX7-NEXT:    s_setpc_b64 s[30:31]
24;
25; GFX10-LABEL: load_lds_v4i32:
26; GFX10:       ; %bb.0:
27; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
29; GFX10-NEXT:    ds_read_b128 v[0:3], v0
30; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
31; GFX10-NEXT:    s_setpc_b64 s[30:31]
32  %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr
33  ret <4 x i32> %load
34}
35
36define <4 x i32> @load_lds_v4i32_align1(<4 x i32> addrspace(3)* %ptr) {
37; GFX9-LABEL: load_lds_v4i32_align1:
38; GFX9:       ; %bb.0:
39; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40; GFX9-NEXT:    ds_read_u8 v1, v0
41; GFX9-NEXT:    ds_read_u8 v2, v0 offset:1
42; GFX9-NEXT:    ds_read_u8 v3, v0 offset:2
43; GFX9-NEXT:    ds_read_u8 v4, v0 offset:3
44; GFX9-NEXT:    ds_read_u8 v5, v0 offset:4
45; GFX9-NEXT:    ds_read_u8 v6, v0 offset:5
46; GFX9-NEXT:    ds_read_u8 v7, v0 offset:6
47; GFX9-NEXT:    ds_read_u8 v8, v0 offset:7
48; GFX9-NEXT:    s_waitcnt lgkmcnt(6)
49; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 8, v1
50; GFX9-NEXT:    s_waitcnt lgkmcnt(5)
51; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
52; GFX9-NEXT:    s_waitcnt lgkmcnt(4)
53; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 24, v4
54; GFX9-NEXT:    v_or3_b32 v4, v1, v2, v3
55; GFX9-NEXT:    s_waitcnt lgkmcnt(2)
56; GFX9-NEXT:    v_lshl_or_b32 v1, v6, 8, v5
57; GFX9-NEXT:    s_waitcnt lgkmcnt(1)
58; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v7
59; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
60; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 24, v8
61; GFX9-NEXT:    v_or3_b32 v1, v1, v2, v3
62; GFX9-NEXT:    ds_read_u8 v2, v0 offset:8
63; GFX9-NEXT:    ds_read_u8 v3, v0 offset:9
64; GFX9-NEXT:    ds_read_u8 v5, v0 offset:10
65; GFX9-NEXT:    ds_read_u8 v6, v0 offset:11
66; GFX9-NEXT:    ds_read_u8 v7, v0 offset:12
67; GFX9-NEXT:    ds_read_u8 v8, v0 offset:13
68; GFX9-NEXT:    ds_read_u8 v9, v0 offset:14
69; GFX9-NEXT:    ds_read_u8 v0, v0 offset:15
70; GFX9-NEXT:    s_waitcnt lgkmcnt(6)
71; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 8, v2
72; GFX9-NEXT:    s_waitcnt lgkmcnt(5)
73; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
74; GFX9-NEXT:    s_waitcnt lgkmcnt(4)
75; GFX9-NEXT:    v_lshlrev_b32_e32 v5, 24, v6
76; GFX9-NEXT:    v_or3_b32 v2, v2, v3, v5
77; GFX9-NEXT:    s_waitcnt lgkmcnt(2)
78; GFX9-NEXT:    v_lshl_or_b32 v3, v8, 8, v7
79; GFX9-NEXT:    s_waitcnt lgkmcnt(1)
80; GFX9-NEXT:    v_lshlrev_b32_e32 v5, 16, v9
81; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
82; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
83; GFX9-NEXT:    v_or3_b32 v3, v3, v5, v0
84; GFX9-NEXT:    v_mov_b32_e32 v0, v4
85; GFX9-NEXT:    s_setpc_b64 s[30:31]
86;
87; GFX7-LABEL: load_lds_v4i32_align1:
88; GFX7:       ; %bb.0:
89; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90; GFX7-NEXT:    ds_read_u8 v1, v0
91; GFX7-NEXT:    ds_read_u8 v2, v0 offset:1
92; GFX7-NEXT:    ds_read_u8 v3, v0 offset:2
93; GFX7-NEXT:    ds_read_u8 v4, v0 offset:3
94; GFX7-NEXT:    ds_read_u8 v5, v0 offset:4
95; GFX7-NEXT:    ds_read_u8 v6, v0 offset:5
96; GFX7-NEXT:    ds_read_u8 v7, v0 offset:6
97; GFX7-NEXT:    ds_read_u8 v8, v0 offset:7
98; GFX7-NEXT:    s_waitcnt lgkmcnt(6)
99; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
100; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
101; GFX7-NEXT:    s_waitcnt lgkmcnt(5)
102; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
103; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
104; GFX7-NEXT:    s_waitcnt lgkmcnt(4)
105; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 24, v4
106; GFX7-NEXT:    v_or_b32_e32 v4, v1, v2
107; GFX7-NEXT:    s_waitcnt lgkmcnt(2)
108; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 8, v6
109; GFX7-NEXT:    v_or_b32_e32 v1, v5, v1
110; GFX7-NEXT:    s_waitcnt lgkmcnt(1)
111; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v7
112; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
113; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
114; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 24, v8
115; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
116; GFX7-NEXT:    ds_read_u8 v2, v0 offset:8
117; GFX7-NEXT:    ds_read_u8 v3, v0 offset:9
118; GFX7-NEXT:    ds_read_u8 v5, v0 offset:10
119; GFX7-NEXT:    ds_read_u8 v6, v0 offset:11
120; GFX7-NEXT:    ds_read_u8 v7, v0 offset:12
121; GFX7-NEXT:    ds_read_u8 v8, v0 offset:13
122; GFX7-NEXT:    ds_read_u8 v9, v0 offset:14
123; GFX7-NEXT:    ds_read_u8 v0, v0 offset:15
124; GFX7-NEXT:    s_waitcnt lgkmcnt(6)
125; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
126; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
127; GFX7-NEXT:    s_waitcnt lgkmcnt(5)
128; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
129; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
130; GFX7-NEXT:    s_waitcnt lgkmcnt(4)
131; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 24, v6
132; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
133; GFX7-NEXT:    s_waitcnt lgkmcnt(2)
134; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v8
135; GFX7-NEXT:    v_or_b32_e32 v3, v7, v3
136; GFX7-NEXT:    s_waitcnt lgkmcnt(1)
137; GFX7-NEXT:    v_lshlrev_b32_e32 v5, 16, v9
138; GFX7-NEXT:    v_or_b32_e32 v3, v3, v5
139; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
140; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
141; GFX7-NEXT:    v_or_b32_e32 v3, v3, v0
142; GFX7-NEXT:    v_mov_b32_e32 v0, v4
143; GFX7-NEXT:    s_setpc_b64 s[30:31]
144;
145; GFX10-LABEL: load_lds_v4i32_align1:
146; GFX10:       ; %bb.0:
147; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
148; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
149; GFX10-NEXT:    ds_read_u8 v1, v0
150; GFX10-NEXT:    ds_read_u8 v2, v0 offset:1
151; GFX10-NEXT:    ds_read_u8 v3, v0 offset:2
152; GFX10-NEXT:    ds_read_u8 v4, v0 offset:3
153; GFX10-NEXT:    ds_read_u8 v5, v0 offset:4
154; GFX10-NEXT:    ds_read_u8 v6, v0 offset:5
155; GFX10-NEXT:    ds_read_u8 v7, v0 offset:6
156; GFX10-NEXT:    ds_read_u8 v8, v0 offset:7
157; GFX10-NEXT:    ds_read_u8 v9, v0 offset:8
158; GFX10-NEXT:    ds_read_u8 v10, v0 offset:9
159; GFX10-NEXT:    ds_read_u8 v11, v0 offset:10
160; GFX10-NEXT:    ds_read_u8 v12, v0 offset:11
161; GFX10-NEXT:    ds_read_u8 v13, v0 offset:12
162; GFX10-NEXT:    ds_read_u8 v14, v0 offset:13
163; GFX10-NEXT:    ds_read_u8 v15, v0 offset:14
164; GFX10-NEXT:    ds_read_u8 v0, v0 offset:15
165; GFX10-NEXT:    s_waitcnt lgkmcnt(14)
166; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 8, v1
167; GFX10-NEXT:    s_waitcnt lgkmcnt(13)
168; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
169; GFX10-NEXT:    s_waitcnt lgkmcnt(12)
170; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 24, v4
171; GFX10-NEXT:    s_waitcnt lgkmcnt(10)
172; GFX10-NEXT:    v_lshl_or_b32 v4, v6, 8, v5
173; GFX10-NEXT:    s_waitcnt lgkmcnt(9)
174; GFX10-NEXT:    v_lshlrev_b32_e32 v5, 16, v7
175; GFX10-NEXT:    s_waitcnt lgkmcnt(8)
176; GFX10-NEXT:    v_lshlrev_b32_e32 v6, 24, v8
177; GFX10-NEXT:    s_waitcnt lgkmcnt(6)
178; GFX10-NEXT:    v_lshl_or_b32 v7, v10, 8, v9
179; GFX10-NEXT:    s_waitcnt lgkmcnt(5)
180; GFX10-NEXT:    v_lshlrev_b32_e32 v8, 16, v11
181; GFX10-NEXT:    s_waitcnt lgkmcnt(4)
182; GFX10-NEXT:    v_lshlrev_b32_e32 v9, 24, v12
183; GFX10-NEXT:    s_waitcnt lgkmcnt(2)
184; GFX10-NEXT:    v_lshl_or_b32 v10, v14, 8, v13
185; GFX10-NEXT:    s_waitcnt lgkmcnt(1)
186; GFX10-NEXT:    v_lshlrev_b32_e32 v11, 16, v15
187; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
188; GFX10-NEXT:    v_lshlrev_b32_e32 v12, 24, v0
189; GFX10-NEXT:    v_or3_b32 v0, v1, v2, v3
190; GFX10-NEXT:    v_or3_b32 v1, v4, v5, v6
191; GFX10-NEXT:    v_or3_b32 v2, v7, v8, v9
192; GFX10-NEXT:    v_or3_b32 v3, v10, v11, v12
193; GFX10-NEXT:    s_setpc_b64 s[30:31]
194  %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 1
195  ret <4 x i32> %load
196}
197
198define <4 x i32> @load_lds_v4i32_align2(<4 x i32> addrspace(3)* %ptr) {
199; GFX9-LABEL: load_lds_v4i32_align2:
200; GFX9:       ; %bb.0:
201; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
202; GFX9-NEXT:    ds_read_u16 v1, v0
203; GFX9-NEXT:    ds_read_u16 v2, v0 offset:2
204; GFX9-NEXT:    ds_read_u16 v3, v0 offset:4
205; GFX9-NEXT:    ds_read_u16 v4, v0 offset:6
206; GFX9-NEXT:    ds_read_u16 v5, v0 offset:8
207; GFX9-NEXT:    ds_read_u16 v6, v0 offset:10
208; GFX9-NEXT:    ds_read_u16 v7, v0 offset:12
209; GFX9-NEXT:    ds_read_u16 v8, v0 offset:14
210; GFX9-NEXT:    s_waitcnt lgkmcnt(6)
211; GFX9-NEXT:    v_lshl_or_b32 v0, v2, 16, v1
212; GFX9-NEXT:    s_waitcnt lgkmcnt(4)
213; GFX9-NEXT:    v_lshl_or_b32 v1, v4, 16, v3
214; GFX9-NEXT:    s_waitcnt lgkmcnt(2)
215; GFX9-NEXT:    v_lshl_or_b32 v2, v6, 16, v5
216; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
217; GFX9-NEXT:    v_lshl_or_b32 v3, v8, 16, v7
218; GFX9-NEXT:    s_setpc_b64 s[30:31]
219;
220; GFX7-LABEL: load_lds_v4i32_align2:
221; GFX7:       ; %bb.0:
222; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223; GFX7-NEXT:    ds_read_u16 v1, v0
224; GFX7-NEXT:    ds_read_u16 v2, v0 offset:2
225; GFX7-NEXT:    ds_read_u16 v3, v0 offset:4
226; GFX7-NEXT:    ds_read_u16 v4, v0 offset:6
227; GFX7-NEXT:    ds_read_u16 v5, v0 offset:8
228; GFX7-NEXT:    ds_read_u16 v6, v0 offset:10
229; GFX7-NEXT:    ds_read_u16 v7, v0 offset:12
230; GFX7-NEXT:    ds_read_u16 v8, v0 offset:14
231; GFX7-NEXT:    s_waitcnt lgkmcnt(6)
232; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v2
233; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
234; GFX7-NEXT:    s_waitcnt lgkmcnt(4)
235; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v4
236; GFX7-NEXT:    v_or_b32_e32 v1, v3, v1
237; GFX7-NEXT:    s_waitcnt lgkmcnt(2)
238; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v6
239; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
240; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v8
241; GFX7-NEXT:    v_or_b32_e32 v2, v5, v2
242; GFX7-NEXT:    v_or_b32_e32 v3, v7, v3
243; GFX7-NEXT:    s_setpc_b64 s[30:31]
244;
245; GFX10-LABEL: load_lds_v4i32_align2:
246; GFX10:       ; %bb.0:
247; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
248; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
249; GFX10-NEXT:    ds_read_u16 v1, v0
250; GFX10-NEXT:    ds_read_u16 v2, v0 offset:2
251; GFX10-NEXT:    ds_read_u16 v3, v0 offset:4
252; GFX10-NEXT:    ds_read_u16 v4, v0 offset:6
253; GFX10-NEXT:    ds_read_u16 v5, v0 offset:8
254; GFX10-NEXT:    ds_read_u16 v6, v0 offset:10
255; GFX10-NEXT:    ds_read_u16 v7, v0 offset:12
256; GFX10-NEXT:    ds_read_u16 v8, v0 offset:14
257; GFX10-NEXT:    s_waitcnt lgkmcnt(6)
258; GFX10-NEXT:    v_lshl_or_b32 v0, v2, 16, v1
259; GFX10-NEXT:    s_waitcnt lgkmcnt(4)
260; GFX10-NEXT:    v_lshl_or_b32 v1, v4, 16, v3
261; GFX10-NEXT:    s_waitcnt lgkmcnt(2)
262; GFX10-NEXT:    v_lshl_or_b32 v2, v6, 16, v5
263; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
264; GFX10-NEXT:    v_lshl_or_b32 v3, v8, 16, v7
265; GFX10-NEXT:    s_setpc_b64 s[30:31]
266  %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 2
267  ret <4 x i32> %load
268}
269
270define <4 x i32> @load_lds_v4i32_align4(<4 x i32> addrspace(3)* %ptr) {
271; GFX9-LABEL: load_lds_v4i32_align4:
272; GFX9:       ; %bb.0:
273; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
274; GFX9-NEXT:    v_mov_b32_e32 v2, v0
275; GFX9-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
276; GFX9-NEXT:    ds_read2_b32 v[2:3], v2 offset0:2 offset1:3
277; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
278; GFX9-NEXT:    s_setpc_b64 s[30:31]
279;
280; GFX7-LABEL: load_lds_v4i32_align4:
281; GFX7:       ; %bb.0:
282; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
283; GFX7-NEXT:    v_mov_b32_e32 v2, v0
284; GFX7-NEXT:    s_mov_b32 m0, -1
285; GFX7-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
286; GFX7-NEXT:    ds_read2_b32 v[2:3], v2 offset0:2 offset1:3
287; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
288; GFX7-NEXT:    s_setpc_b64 s[30:31]
289;
290; GFX10-LABEL: load_lds_v4i32_align4:
291; GFX10:       ; %bb.0:
292; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
293; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
294; GFX10-NEXT:    v_mov_b32_e32 v2, v0
295; GFX10-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
296; GFX10-NEXT:    ds_read2_b32 v[2:3], v2 offset0:2 offset1:3
297; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
298; GFX10-NEXT:    s_setpc_b64 s[30:31]
299  %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 4
300  ret <4 x i32> %load
301}
302
303define <4 x i32> @load_lds_v4i32_align8(<4 x i32> addrspace(3)* %ptr) {
304; GFX9-LABEL: load_lds_v4i32_align8:
305; GFX9:       ; %bb.0:
306; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
307; GFX9-NEXT:    ds_read2_b64 v[0:3], v0 offset1:1
308; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
309; GFX9-NEXT:    s_setpc_b64 s[30:31]
310;
311; GFX7-LABEL: load_lds_v4i32_align8:
312; GFX7:       ; %bb.0:
313; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
314; GFX7-NEXT:    s_mov_b32 m0, -1
315; GFX7-NEXT:    ds_read2_b64 v[0:3], v0 offset1:1
316; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
317; GFX7-NEXT:    s_setpc_b64 s[30:31]
318;
319; GFX10-LABEL: load_lds_v4i32_align8:
320; GFX10:       ; %bb.0:
321; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
323; GFX10-NEXT:    ds_read2_b64 v[0:3], v0 offset1:1
324; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
325; GFX10-NEXT:    s_setpc_b64 s[30:31]
326  %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 8
327  ret <4 x i32> %load
328}
329
330define <4 x i32> @load_lds_v4i32_align16(<4 x i32> addrspace(3)* %ptr) {
331; GFX9-LABEL: load_lds_v4i32_align16:
332; GFX9:       ; %bb.0:
333; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
334; GFX9-NEXT:    ds_read_b128 v[0:3], v0
335; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
336; GFX9-NEXT:    s_setpc_b64 s[30:31]
337;
338; GFX7-LABEL: load_lds_v4i32_align16:
339; GFX7:       ; %bb.0:
340; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
341; GFX7-NEXT:    s_mov_b32 m0, -1
342; GFX7-NEXT:    ds_read_b128 v[0:3], v0
343; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
344; GFX7-NEXT:    s_setpc_b64 s[30:31]
345;
346; GFX10-LABEL: load_lds_v4i32_align16:
347; GFX10:       ; %bb.0:
348; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
349; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
350; GFX10-NEXT:    ds_read_b128 v[0:3], v0
351; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
352; GFX10-NEXT:    s_setpc_b64 s[30:31]
353  %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 16
354  ret <4 x i32> %load
355}
356