1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s
3; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -verify-machineinstrs < %s | FileCheck --check-prefix=GFX7 %s
4; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefix=GFX6 %s
5
6define <4 x i32> @load_lds_v4i32(<4 x i32> addrspace(3)* %ptr) {
7; GFX9-LABEL: load_lds_v4i32:
8; GFX9:       ; %bb.0:
9; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GFX9-NEXT:    ds_read_b128 v[0:3], v0
11; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
12; GFX9-NEXT:    s_setpc_b64 s[30:31]
13;
14; GFX7-LABEL: load_lds_v4i32:
15; GFX7:       ; %bb.0:
16; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17; GFX7-NEXT:    s_mov_b32 m0, -1
18; GFX7-NEXT:    ds_read_b128 v[0:3], v0
19; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
20; GFX7-NEXT:    s_setpc_b64 s[30:31]
21;
22; GFX6-LABEL: load_lds_v4i32:
23; GFX6:       ; %bb.0:
24; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 8, v0
26; GFX6-NEXT:    s_mov_b32 m0, -1
27; GFX6-NEXT:    ds_read_b64 v[2:3], v1
28; GFX6-NEXT:    ds_read_b64 v[0:1], v0
29; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
30; GFX6-NEXT:    s_setpc_b64 s[30:31]
31  %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr
32  ret <4 x i32> %load
33}
34
35define <4 x i32> @load_lds_v4i32_align1(<4 x i32> addrspace(3)* %ptr) {
36; GFX9-LABEL: load_lds_v4i32_align1:
37; GFX9:       ; %bb.0:
38; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39; GFX9-NEXT:    ds_read_u8 v1, v0
40; GFX9-NEXT:    ds_read_u8 v2, v0 offset:1
41; GFX9-NEXT:    ds_read_u8 v3, v0 offset:2
42; GFX9-NEXT:    ds_read_u8 v4, v0 offset:3
43; GFX9-NEXT:    ds_read_u8 v5, v0 offset:4
44; GFX9-NEXT:    ds_read_u8 v6, v0 offset:5
45; GFX9-NEXT:    ds_read_u8 v7, v0 offset:6
46; GFX9-NEXT:    ds_read_u8 v8, v0 offset:7
47; GFX9-NEXT:    ds_read_u8 v9, v0 offset:8
48; GFX9-NEXT:    ds_read_u8 v10, v0 offset:9
49; GFX9-NEXT:    ds_read_u8 v11, v0 offset:10
50; GFX9-NEXT:    ds_read_u8 v12, v0 offset:11
51; GFX9-NEXT:    ds_read_u8 v13, v0 offset:12
52; GFX9-NEXT:    ds_read_u8 v14, v0 offset:13
53; GFX9-NEXT:    ds_read_u8 v15, v0 offset:14
54; GFX9-NEXT:    ds_read_u8 v16, v0 offset:15
55; GFX9-NEXT:    s_waitcnt lgkmcnt(14)
56; GFX9-NEXT:    v_lshl_or_b32 v0, v2, 8, v1
57; GFX9-NEXT:    s_waitcnt lgkmcnt(12)
58; GFX9-NEXT:    v_lshl_or_b32 v1, v4, 8, v3
59; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
60; GFX9-NEXT:    s_waitcnt lgkmcnt(10)
61; GFX9-NEXT:    v_lshl_or_b32 v1, v6, 8, v5
62; GFX9-NEXT:    s_waitcnt lgkmcnt(8)
63; GFX9-NEXT:    v_lshl_or_b32 v2, v8, 8, v7
64; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
65; GFX9-NEXT:    s_waitcnt lgkmcnt(6)
66; GFX9-NEXT:    v_lshl_or_b32 v2, v10, 8, v9
67; GFX9-NEXT:    s_waitcnt lgkmcnt(4)
68; GFX9-NEXT:    v_lshl_or_b32 v3, v12, 8, v11
69; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
70; GFX9-NEXT:    s_waitcnt lgkmcnt(2)
71; GFX9-NEXT:    v_lshl_or_b32 v3, v14, 8, v13
72; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
73; GFX9-NEXT:    v_lshl_or_b32 v4, v16, 8, v15
74; GFX9-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
75; GFX9-NEXT:    s_setpc_b64 s[30:31]
76;
77; GFX7-LABEL: load_lds_v4i32_align1:
78; GFX7:       ; %bb.0:
79; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80; GFX7-NEXT:    s_mov_b32 m0, -1
81; GFX7-NEXT:    ds_read_u8 v1, v0 offset:7
82; GFX7-NEXT:    ds_read_u8 v2, v0 offset:6
83; GFX7-NEXT:    ds_read_u8 v3, v0 offset:5
84; GFX7-NEXT:    ds_read_u8 v5, v0 offset:4
85; GFX7-NEXT:    ds_read_u8 v4, v0 offset:3
86; GFX7-NEXT:    ds_read_u8 v6, v0 offset:2
87; GFX7-NEXT:    ds_read_u8 v7, v0 offset:1
88; GFX7-NEXT:    ds_read_u8 v8, v0
89; GFX7-NEXT:    s_waitcnt lgkmcnt(7)
90; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
91; GFX7-NEXT:    s_waitcnt lgkmcnt(3)
92; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
93; GFX7-NEXT:    s_waitcnt lgkmcnt(2)
94; GFX7-NEXT:    v_or_b32_e32 v4, v4, v6
95; GFX7-NEXT:    s_waitcnt lgkmcnt(1)
96; GFX7-NEXT:    v_lshlrev_b32_e32 v7, 8, v7
97; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
98; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
99; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
100; GFX7-NEXT:    v_or_b32_e32 v7, v7, v8
101; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
102; GFX7-NEXT:    v_or_b32_e32 v3, v3, v5
103; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
104; GFX7-NEXT:    v_or_b32_e32 v4, v4, v7
105; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
106; GFX7-NEXT:    ds_read_u8 v3, v0 offset:15
107; GFX7-NEXT:    ds_read_u8 v5, v0 offset:14
108; GFX7-NEXT:    ds_read_u8 v6, v0 offset:13
109; GFX7-NEXT:    ds_read_u8 v7, v0 offset:12
110; GFX7-NEXT:    ds_read_u8 v2, v0 offset:11
111; GFX7-NEXT:    ds_read_u8 v8, v0 offset:10
112; GFX7-NEXT:    ds_read_u8 v9, v0 offset:9
113; GFX7-NEXT:    ds_read_u8 v0, v0 offset:8
114; GFX7-NEXT:    s_waitcnt lgkmcnt(7)
115; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
116; GFX7-NEXT:    s_waitcnt lgkmcnt(3)
117; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
118; GFX7-NEXT:    s_waitcnt lgkmcnt(2)
119; GFX7-NEXT:    v_or_b32_e32 v2, v2, v8
120; GFX7-NEXT:    s_waitcnt lgkmcnt(1)
121; GFX7-NEXT:    v_lshlrev_b32_e32 v9, 8, v9
122; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
123; GFX7-NEXT:    v_or_b32_e32 v0, v9, v0
124; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
125; GFX7-NEXT:    v_or_b32_e32 v3, v3, v5
126; GFX7-NEXT:    v_or_b32_e32 v2, v2, v0
127; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 8, v6
128; GFX7-NEXT:    v_or_b32_e32 v0, v0, v7
129; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
130; GFX7-NEXT:    v_or_b32_e32 v3, v3, v0
131; GFX7-NEXT:    v_mov_b32_e32 v0, v4
132; GFX7-NEXT:    s_setpc_b64 s[30:31]
133;
134; GFX6-LABEL: load_lds_v4i32_align1:
135; GFX6:       ; %bb.0:
136; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
137; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 5, v0
138; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 4, v0
139; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 7, v0
140; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 6, v0
141; GFX6-NEXT:    v_add_i32_e32 v5, vcc, 9, v0
142; GFX6-NEXT:    v_add_i32_e32 v6, vcc, 8, v0
143; GFX6-NEXT:    v_add_i32_e32 v7, vcc, 11, v0
144; GFX6-NEXT:    s_mov_b32 m0, -1
145; GFX6-NEXT:    ds_read_u8 v2, v2
146; GFX6-NEXT:    ds_read_u8 v3, v3
147; GFX6-NEXT:    ds_read_u8 v4, v4
148; GFX6-NEXT:    ds_read_u8 v5, v5
149; GFX6-NEXT:    ds_read_u8 v6, v6
150; GFX6-NEXT:    ds_read_u8 v7, v7
151; GFX6-NEXT:    ds_read_u8 v1, v1
152; GFX6-NEXT:    ds_read_u8 v8, v0
153; GFX6-NEXT:    v_add_i32_e32 v9, vcc, 14, v0
154; GFX6-NEXT:    v_add_i32_e32 v10, vcc, 3, v0
155; GFX6-NEXT:    v_add_i32_e32 v11, vcc, 2, v0
156; GFX6-NEXT:    s_waitcnt lgkmcnt(1)
157; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
158; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
159; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 8, v3
160; GFX6-NEXT:    v_or_b32_e32 v2, v2, v4
161; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
162; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
163; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 8, v5
164; GFX6-NEXT:    v_or_b32_e32 v2, v2, v6
165; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 8, v7
166; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 10, v0
167; GFX6-NEXT:    v_add_i32_e32 v5, vcc, 13, v0
168; GFX6-NEXT:    v_add_i32_e32 v6, vcc, 12, v0
169; GFX6-NEXT:    v_add_i32_e32 v7, vcc, 15, v0
170; GFX6-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
171; GFX6-NEXT:    ds_read_u8 v4, v4
172; GFX6-NEXT:    ds_read_u8 v5, v5
173; GFX6-NEXT:    ds_read_u8 v6, v6
174; GFX6-NEXT:    ds_read_u8 v7, v7
175; GFX6-NEXT:    ds_read_u8 v9, v9
176; GFX6-NEXT:    ds_read_u8 v10, v10
177; GFX6-NEXT:    ds_read_u8 v11, v11
178; GFX6-NEXT:    ds_read_u8 v0, v0
179; GFX6-NEXT:    s_waitcnt lgkmcnt(7)
180; GFX6-NEXT:    v_or_b32_e32 v3, v3, v4
181; GFX6-NEXT:    s_waitcnt lgkmcnt(4)
182; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 8, v7
183; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
184; GFX6-NEXT:    s_waitcnt lgkmcnt(3)
185; GFX6-NEXT:    v_or_b32_e32 v4, v4, v9
186; GFX6-NEXT:    v_or_b32_e32 v2, v3, v2
187; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 8, v5
188; GFX6-NEXT:    v_or_b32_e32 v3, v3, v6
189; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
190; GFX6-NEXT:    v_or_b32_e32 v3, v4, v3
191; GFX6-NEXT:    s_waitcnt lgkmcnt(2)
192; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 8, v10
193; GFX6-NEXT:    s_waitcnt lgkmcnt(1)
194; GFX6-NEXT:    v_or_b32_e32 v4, v4, v11
195; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
196; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
197; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
198; GFX6-NEXT:    v_or_b32_e32 v0, v0, v8
199; GFX6-NEXT:    v_or_b32_e32 v0, v4, v0
200; GFX6-NEXT:    s_setpc_b64 s[30:31]
201  %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 1
202  ret <4 x i32> %load
203}
204
205define <4 x i32> @load_lds_v4i32_align2(<4 x i32> addrspace(3)* %ptr) {
206; GFX9-LABEL: load_lds_v4i32_align2:
207; GFX9:       ; %bb.0:
208; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
209; GFX9-NEXT:    ds_read_u16 v1, v0
210; GFX9-NEXT:    ds_read_u16 v2, v0 offset:2
211; GFX9-NEXT:    ds_read_u16 v3, v0 offset:4
212; GFX9-NEXT:    ds_read_u16 v4, v0 offset:6
213; GFX9-NEXT:    ds_read_u16 v5, v0 offset:8
214; GFX9-NEXT:    ds_read_u16 v6, v0 offset:10
215; GFX9-NEXT:    ds_read_u16 v7, v0 offset:12
216; GFX9-NEXT:    ds_read_u16 v8, v0 offset:14
217; GFX9-NEXT:    s_waitcnt lgkmcnt(6)
218; GFX9-NEXT:    v_lshl_or_b32 v0, v2, 16, v1
219; GFX9-NEXT:    s_waitcnt lgkmcnt(4)
220; GFX9-NEXT:    v_lshl_or_b32 v1, v4, 16, v3
221; GFX9-NEXT:    s_waitcnt lgkmcnt(2)
222; GFX9-NEXT:    v_lshl_or_b32 v2, v6, 16, v5
223; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
224; GFX9-NEXT:    v_lshl_or_b32 v3, v8, 16, v7
225; GFX9-NEXT:    s_setpc_b64 s[30:31]
226;
227; GFX7-LABEL: load_lds_v4i32_align2:
228; GFX7:       ; %bb.0:
229; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
230; GFX7-NEXT:    s_mov_b32 m0, -1
231; GFX7-NEXT:    ds_read_u16 v3, v0 offset:14
232; GFX7-NEXT:    ds_read_u16 v4, v0 offset:12
233; GFX7-NEXT:    ds_read_u16 v2, v0 offset:10
234; GFX7-NEXT:    ds_read_u16 v5, v0 offset:8
235; GFX7-NEXT:    ds_read_u16 v1, v0 offset:6
236; GFX7-NEXT:    ds_read_u16 v6, v0 offset:4
237; GFX7-NEXT:    ds_read_u16 v7, v0 offset:2
238; GFX7-NEXT:    ds_read_u16 v0, v0
239; GFX7-NEXT:    s_waitcnt lgkmcnt(5)
240; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
241; GFX7-NEXT:    s_waitcnt lgkmcnt(3)
242; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
243; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
244; GFX7-NEXT:    s_waitcnt lgkmcnt(1)
245; GFX7-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
246; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
247; GFX7-NEXT:    v_or_b32_e32 v0, v7, v0
248; GFX7-NEXT:    v_or_b32_e32 v1, v1, v6
249; GFX7-NEXT:    v_or_b32_e32 v2, v2, v5
250; GFX7-NEXT:    v_or_b32_e32 v3, v3, v4
251; GFX7-NEXT:    s_setpc_b64 s[30:31]
252;
253; GFX6-LABEL: load_lds_v4i32_align2:
254; GFX6:       ; %bb.0:
255; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 6, v0
257; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 4, v0
258; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 10, v0
259; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 8, v0
260; GFX6-NEXT:    v_add_i32_e32 v5, vcc, 14, v0
261; GFX6-NEXT:    v_add_i32_e32 v6, vcc, 12, v0
262; GFX6-NEXT:    v_add_i32_e32 v7, vcc, 2, v0
263; GFX6-NEXT:    s_mov_b32 m0, -1
264; GFX6-NEXT:    ds_read_u16 v2, v2
265; GFX6-NEXT:    ds_read_u16 v3, v3
266; GFX6-NEXT:    ds_read_u16 v4, v4
267; GFX6-NEXT:    ds_read_u16 v5, v5
268; GFX6-NEXT:    ds_read_u16 v6, v6
269; GFX6-NEXT:    ds_read_u16 v7, v7
270; GFX6-NEXT:    ds_read_u16 v1, v1
271; GFX6-NEXT:    ds_read_u16 v0, v0
272; GFX6-NEXT:    s_waitcnt lgkmcnt(1)
273; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
274; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
275; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
276; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
277; GFX6-NEXT:    v_or_b32_e32 v2, v2, v4
278; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v7
279; GFX6-NEXT:    v_or_b32_e32 v3, v3, v6
280; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
281; GFX6-NEXT:    v_or_b32_e32 v0, v4, v0
282; GFX6-NEXT:    s_setpc_b64 s[30:31]
283  %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 2
284  ret <4 x i32> %load
285}
286
287define <4 x i32> @load_lds_v4i32_align4(<4 x i32> addrspace(3)* %ptr) {
288; GFX9-LABEL: load_lds_v4i32_align4:
289; GFX9:       ; %bb.0:
290; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
291; GFX9-NEXT:    v_mov_b32_e32 v2, v0
292; GFX9-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
293; GFX9-NEXT:    ds_read2_b32 v[2:3], v2 offset0:2 offset1:3
294; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
295; GFX9-NEXT:    s_setpc_b64 s[30:31]
296;
297; GFX7-LABEL: load_lds_v4i32_align4:
298; GFX7:       ; %bb.0:
299; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
300; GFX7-NEXT:    v_mov_b32_e32 v2, v0
301; GFX7-NEXT:    s_mov_b32 m0, -1
302; GFX7-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
303; GFX7-NEXT:    ds_read2_b32 v[2:3], v2 offset0:2 offset1:3
304; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
305; GFX7-NEXT:    s_setpc_b64 s[30:31]
306;
307; GFX6-LABEL: load_lds_v4i32_align4:
308; GFX6:       ; %bb.0:
309; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
310; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 4, v0
311; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 8, v0
312; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 12, v0
313; GFX6-NEXT:    s_mov_b32 m0, -1
314; GFX6-NEXT:    ds_read_b32 v2, v2
315; GFX6-NEXT:    ds_read_b32 v3, v3
316; GFX6-NEXT:    ds_read_b32 v1, v1
317; GFX6-NEXT:    ds_read_b32 v0, v0
318; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
319; GFX6-NEXT:    s_setpc_b64 s[30:31]
320  %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 4
321  ret <4 x i32> %load
322}
323
324define <4 x i32> @load_lds_v4i32_align8(<4 x i32> addrspace(3)* %ptr) {
325; GFX9-LABEL: load_lds_v4i32_align8:
326; GFX9:       ; %bb.0:
327; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
328; GFX9-NEXT:    ds_read2_b64 v[0:3], v0 offset1:1
329; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
330; GFX9-NEXT:    s_setpc_b64 s[30:31]
331;
332; GFX7-LABEL: load_lds_v4i32_align8:
333; GFX7:       ; %bb.0:
334; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335; GFX7-NEXT:    s_mov_b32 m0, -1
336; GFX7-NEXT:    ds_read2_b64 v[0:3], v0 offset1:1
337; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
338; GFX7-NEXT:    s_setpc_b64 s[30:31]
339;
340; GFX6-LABEL: load_lds_v4i32_align8:
341; GFX6:       ; %bb.0:
342; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
343; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 8, v0
344; GFX6-NEXT:    s_mov_b32 m0, -1
345; GFX6-NEXT:    ds_read_b64 v[2:3], v1
346; GFX6-NEXT:    ds_read_b64 v[0:1], v0
347; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
348; GFX6-NEXT:    s_setpc_b64 s[30:31]
349  %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 8
350  ret <4 x i32> %load
351}
352
353define <4 x i32> @load_lds_v4i32_align16(<4 x i32> addrspace(3)* %ptr) {
354; GFX9-LABEL: load_lds_v4i32_align16:
355; GFX9:       ; %bb.0:
356; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
357; GFX9-NEXT:    ds_read_b128 v[0:3], v0
358; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
359; GFX9-NEXT:    s_setpc_b64 s[30:31]
360;
361; GFX7-LABEL: load_lds_v4i32_align16:
362; GFX7:       ; %bb.0:
363; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364; GFX7-NEXT:    s_mov_b32 m0, -1
365; GFX7-NEXT:    ds_read_b128 v[0:3], v0
366; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
367; GFX7-NEXT:    s_setpc_b64 s[30:31]
368;
369; GFX6-LABEL: load_lds_v4i32_align16:
370; GFX6:       ; %bb.0:
371; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 8, v0
373; GFX6-NEXT:    s_mov_b32 m0, -1
374; GFX6-NEXT:    ds_read_b64 v[2:3], v1
375; GFX6-NEXT:    ds_read_b64 v[0:1], v0
376; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
377; GFX6-NEXT:    s_setpc_b64 s[30:31]
378  %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 16
379  ret <4 x i32> %load
380}
381