1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
5
6; Test gfx9+ s_shl[1-4]_add_u32 pattern matching
7
8define amdgpu_ps i32 @s_shl1_add_u32(i32 inreg %src0, i32 inreg %src1) {
9; GFX9-LABEL: s_shl1_add_u32:
10; GFX9:       ; %bb.0:
11; GFX9-NEXT:    s_lshl1_add_u32 s0, s0, s1
12; GFX9-NEXT:    ; return to shader part epilog
13;
14; GFX8-LABEL: s_shl1_add_u32:
15; GFX8:       ; %bb.0:
16; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
17; GFX8-NEXT:    s_add_i32 s0, s0, s1
18; GFX8-NEXT:    ; return to shader part epilog
19;
20; GFX10-LABEL: s_shl1_add_u32:
21; GFX10:       ; %bb.0:
22; GFX10-NEXT:    s_lshl1_add_u32 s0, s0, s1
23; GFX10-NEXT:    ; return to shader part epilog
24  %shl = shl i32 %src0, 1
25  %add = add i32 %shl, %src1
26  ret i32 %add
27}
28
29define amdgpu_ps i32 @s_shl2_add_u32(i32 inreg %src0, i32 inreg %src1) {
30; GFX9-LABEL: s_shl2_add_u32:
31; GFX9:       ; %bb.0:
32; GFX9-NEXT:    s_lshl2_add_u32 s0, s0, s1
33; GFX9-NEXT:    ; return to shader part epilog
34;
35; GFX8-LABEL: s_shl2_add_u32:
36; GFX8:       ; %bb.0:
37; GFX8-NEXT:    s_lshl_b32 s0, s0, 2
38; GFX8-NEXT:    s_add_i32 s0, s0, s1
39; GFX8-NEXT:    ; return to shader part epilog
40;
41; GFX10-LABEL: s_shl2_add_u32:
42; GFX10:       ; %bb.0:
43; GFX10-NEXT:    s_lshl2_add_u32 s0, s0, s1
44; GFX10-NEXT:    ; return to shader part epilog
45  %shl = shl i32 %src0, 2
46  %add = add i32 %shl, %src1
47  ret i32 %add
48}
49
50define amdgpu_ps i32 @s_shl3_add_u32(i32 inreg %src0, i32 inreg %src1) {
51; GFX9-LABEL: s_shl3_add_u32:
52; GFX9:       ; %bb.0:
53; GFX9-NEXT:    s_lshl3_add_u32 s0, s0, s1
54; GFX9-NEXT:    ; return to shader part epilog
55;
56; GFX8-LABEL: s_shl3_add_u32:
57; GFX8:       ; %bb.0:
58; GFX8-NEXT:    s_lshl_b32 s0, s0, 3
59; GFX8-NEXT:    s_add_i32 s0, s0, s1
60; GFX8-NEXT:    ; return to shader part epilog
61;
62; GFX10-LABEL: s_shl3_add_u32:
63; GFX10:       ; %bb.0:
64; GFX10-NEXT:    s_lshl3_add_u32 s0, s0, s1
65; GFX10-NEXT:    ; return to shader part epilog
66  %shl = shl i32 %src0, 3
67  %add = add i32 %shl, %src1
68  ret i32 %add
69}
70
71define amdgpu_ps i32 @s_shl4_add_u32(i32 inreg %src0, i32 inreg %src1) {
72; GFX9-LABEL: s_shl4_add_u32:
73; GFX9:       ; %bb.0:
74; GFX9-NEXT:    s_lshl4_add_u32 s0, s0, s1
75; GFX9-NEXT:    ; return to shader part epilog
76;
77; GFX8-LABEL: s_shl4_add_u32:
78; GFX8:       ; %bb.0:
79; GFX8-NEXT:    s_lshl_b32 s0, s0, 4
80; GFX8-NEXT:    s_add_i32 s0, s0, s1
81; GFX8-NEXT:    ; return to shader part epilog
82;
83; GFX10-LABEL: s_shl4_add_u32:
84; GFX10:       ; %bb.0:
85; GFX10-NEXT:    s_lshl4_add_u32 s0, s0, s1
86; GFX10-NEXT:    ; return to shader part epilog
87  %shl = shl i32 %src0, 4
88  %add = add i32 %shl, %src1
89  ret i32 %add
90}
91
92define amdgpu_ps i32 @s_shl5_add_u32(i32 inreg %src0, i32 inreg %src1) {
93; GCN-LABEL: s_shl5_add_u32:
94; GCN:       ; %bb.0:
95; GCN-NEXT:    s_lshl_b32 s0, s0, 5
96; GCN-NEXT:    s_add_i32 s0, s0, s1
97; GCN-NEXT:    ; return to shader part epilog
98  %shl = shl i32 %src0, 5
99  %add = add i32 %shl, %src1
100  ret i32 %add
101}
102
103define i32 @v_shl1_add_u32(i32 %src0, i32 %src1) {
104; GFX9-LABEL: v_shl1_add_u32:
105; GFX9:       ; %bb.0:
106; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
107; GFX9-NEXT:    v_lshl_add_u32 v0, v0, 1, v1
108; GFX9-NEXT:    s_setpc_b64 s[30:31]
109;
110; GFX8-LABEL: v_shl1_add_u32:
111; GFX8:       ; %bb.0:
112; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
114; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
115; GFX8-NEXT:    s_setpc_b64 s[30:31]
116;
117; GFX10-LABEL: v_shl1_add_u32:
118; GFX10:       ; %bb.0:
119; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
121; GFX10-NEXT:    v_lshl_add_u32 v0, v0, 1, v1
122; GFX10-NEXT:    s_setpc_b64 s[30:31]
123  %shl = shl i32 %src0, 1
124  %add = add i32 %shl, %src1
125  ret i32 %add
126}
127
128define i32 @v_shl2_add_u32(i32 %src0, i32 %src1) {
129; GFX9-LABEL: v_shl2_add_u32:
130; GFX9:       ; %bb.0:
131; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132; GFX9-NEXT:    v_lshl_add_u32 v0, v0, 2, v1
133; GFX9-NEXT:    s_setpc_b64 s[30:31]
134;
135; GFX8-LABEL: v_shl2_add_u32:
136; GFX8:       ; %bb.0:
137; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
139; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
140; GFX8-NEXT:    s_setpc_b64 s[30:31]
141;
142; GFX10-LABEL: v_shl2_add_u32:
143; GFX10:       ; %bb.0:
144; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
146; GFX10-NEXT:    v_lshl_add_u32 v0, v0, 2, v1
147; GFX10-NEXT:    s_setpc_b64 s[30:31]
148  %shl = shl i32 %src0, 2
149  %add = add i32 %shl, %src1
150  ret i32 %add
151}
152
153define i32 @v_shl3_add_u32(i32 %src0, i32 %src1) {
154; GFX9-LABEL: v_shl3_add_u32:
155; GFX9:       ; %bb.0:
156; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157; GFX9-NEXT:    v_lshl_add_u32 v0, v0, 3, v1
158; GFX9-NEXT:    s_setpc_b64 s[30:31]
159;
160; GFX8-LABEL: v_shl3_add_u32:
161; GFX8:       ; %bb.0:
162; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
163; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
164; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
165; GFX8-NEXT:    s_setpc_b64 s[30:31]
166;
167; GFX10-LABEL: v_shl3_add_u32:
168; GFX10:       ; %bb.0:
169; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
171; GFX10-NEXT:    v_lshl_add_u32 v0, v0, 3, v1
172; GFX10-NEXT:    s_setpc_b64 s[30:31]
173  %shl = shl i32 %src0, 3
174  %add = add i32 %shl, %src1
175  ret i32 %add
176}
177
178define i32 @v_shl4_add_u32(i32 %src0, i32 %src1) {
179; GFX9-LABEL: v_shl4_add_u32:
180; GFX9:       ; %bb.0:
181; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182; GFX9-NEXT:    v_lshl_add_u32 v0, v0, 4, v1
183; GFX9-NEXT:    s_setpc_b64 s[30:31]
184;
185; GFX8-LABEL: v_shl4_add_u32:
186; GFX8:       ; %bb.0:
187; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
188; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
189; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
190; GFX8-NEXT:    s_setpc_b64 s[30:31]
191;
192; GFX10-LABEL: v_shl4_add_u32:
193; GFX10:       ; %bb.0:
194; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
195; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
196; GFX10-NEXT:    v_lshl_add_u32 v0, v0, 4, v1
197; GFX10-NEXT:    s_setpc_b64 s[30:31]
198  %shl = shl i32 %src0, 4
199  %add = add i32 %shl, %src1
200  ret i32 %add
201}
202
203define i32 @v_shl5_add_u32(i32 %src0, i32 %src1) {
204; GFX9-LABEL: v_shl5_add_u32:
205; GFX9:       ; %bb.0:
206; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
207; GFX9-NEXT:    v_lshl_add_u32 v0, v0, 5, v1
208; GFX9-NEXT:    s_setpc_b64 s[30:31]
209;
210; GFX8-LABEL: v_shl5_add_u32:
211; GFX8:       ; %bb.0:
212; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
213; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
214; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
215; GFX8-NEXT:    s_setpc_b64 s[30:31]
216;
217; GFX10-LABEL: v_shl5_add_u32:
218; GFX10:       ; %bb.0:
219; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
220; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
221; GFX10-NEXT:    v_lshl_add_u32 v0, v0, 5, v1
222; GFX10-NEXT:    s_setpc_b64 s[30:31]
223  %shl = shl i32 %src0, 5
224  %add = add i32 %shl, %src1
225  ret i32 %add
226}
227
228; FIXME: Use v_lshl_add_u32
229; shift is scalar, but add is vector.
230define amdgpu_ps float @shl1_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
231; GFX9-LABEL: shl1_add_u32_vgpr1:
232; GFX9:       ; %bb.0:
233; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
234; GFX9-NEXT:    v_add_u32_e32 v0, s0, v0
235; GFX9-NEXT:    ; return to shader part epilog
236;
237; GFX8-LABEL: shl1_add_u32_vgpr1:
238; GFX8:       ; %bb.0:
239; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
240; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
241; GFX8-NEXT:    ; return to shader part epilog
242;
243; GFX10-LABEL: shl1_add_u32_vgpr1:
244; GFX10:       ; %bb.0:
245; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
246; GFX10-NEXT:    v_add_nc_u32_e32 v0, s0, v0
247; GFX10-NEXT:    ; return to shader part epilog
248  %shl = shl i32 %src0, 1
249  %add = add i32 %shl, %src1
250  %cast = bitcast i32 %add to float
251  ret float %cast
252}
253
254define amdgpu_ps float @shl2_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
255; GFX9-LABEL: shl2_add_u32_vgpr1:
256; GFX9:       ; %bb.0:
257; GFX9-NEXT:    s_lshl_b32 s0, s0, 2
258; GFX9-NEXT:    v_add_u32_e32 v0, s0, v0
259; GFX9-NEXT:    ; return to shader part epilog
260;
261; GFX8-LABEL: shl2_add_u32_vgpr1:
262; GFX8:       ; %bb.0:
263; GFX8-NEXT:    s_lshl_b32 s0, s0, 2
264; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
265; GFX8-NEXT:    ; return to shader part epilog
266;
267; GFX10-LABEL: shl2_add_u32_vgpr1:
268; GFX10:       ; %bb.0:
269; GFX10-NEXT:    s_lshl_b32 s0, s0, 2
270; GFX10-NEXT:    v_add_nc_u32_e32 v0, s0, v0
271; GFX10-NEXT:    ; return to shader part epilog
272  %shl = shl i32 %src0, 2
273  %add = add i32 %shl, %src1
274  %cast = bitcast i32 %add to float
275  ret float %cast
276}
277
278define amdgpu_ps float @shl3_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
279; GFX9-LABEL: shl3_add_u32_vgpr1:
280; GFX9:       ; %bb.0:
281; GFX9-NEXT:    s_lshl_b32 s0, s0, 3
282; GFX9-NEXT:    v_add_u32_e32 v0, s0, v0
283; GFX9-NEXT:    ; return to shader part epilog
284;
285; GFX8-LABEL: shl3_add_u32_vgpr1:
286; GFX8:       ; %bb.0:
287; GFX8-NEXT:    s_lshl_b32 s0, s0, 3
288; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
289; GFX8-NEXT:    ; return to shader part epilog
290;
291; GFX10-LABEL: shl3_add_u32_vgpr1:
292; GFX10:       ; %bb.0:
293; GFX10-NEXT:    s_lshl_b32 s0, s0, 3
294; GFX10-NEXT:    v_add_nc_u32_e32 v0, s0, v0
295; GFX10-NEXT:    ; return to shader part epilog
296  %shl = shl i32 %src0, 3
297  %add = add i32 %shl, %src1
298  %cast = bitcast i32 %add to float
299  ret float %cast
300}
301
302define amdgpu_ps float @shl4_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
303; GFX9-LABEL: shl4_add_u32_vgpr1:
304; GFX9:       ; %bb.0:
305; GFX9-NEXT:    s_lshl_b32 s0, s0, 4
306; GFX9-NEXT:    v_add_u32_e32 v0, s0, v0
307; GFX9-NEXT:    ; return to shader part epilog
308;
309; GFX8-LABEL: shl4_add_u32_vgpr1:
310; GFX8:       ; %bb.0:
311; GFX8-NEXT:    s_lshl_b32 s0, s0, 4
312; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
313; GFX8-NEXT:    ; return to shader part epilog
314;
315; GFX10-LABEL: shl4_add_u32_vgpr1:
316; GFX10:       ; %bb.0:
317; GFX10-NEXT:    s_lshl_b32 s0, s0, 4
318; GFX10-NEXT:    v_add_nc_u32_e32 v0, s0, v0
319; GFX10-NEXT:    ; return to shader part epilog
320  %shl = shl i32 %src0, 4
321  %add = add i32 %shl, %src1
322  %cast = bitcast i32 %add to float
323  ret float %cast
324}
325
326define amdgpu_ps float @shl5_add_u32_vgpr1(i32 inreg %src0, i32 %src1) {
327; GFX9-LABEL: shl5_add_u32_vgpr1:
328; GFX9:       ; %bb.0:
329; GFX9-NEXT:    s_lshl_b32 s0, s0, 5
330; GFX9-NEXT:    v_add_u32_e32 v0, s0, v0
331; GFX9-NEXT:    ; return to shader part epilog
332;
333; GFX8-LABEL: shl5_add_u32_vgpr1:
334; GFX8:       ; %bb.0:
335; GFX8-NEXT:    s_lshl_b32 s0, s0, 5
336; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
337; GFX8-NEXT:    ; return to shader part epilog
338;
339; GFX10-LABEL: shl5_add_u32_vgpr1:
340; GFX10:       ; %bb.0:
341; GFX10-NEXT:    s_lshl_b32 s0, s0, 5
342; GFX10-NEXT:    v_add_nc_u32_e32 v0, s0, v0
343; GFX10-NEXT:    ; return to shader part epilog
344  %shl = shl i32 %src0, 5
345  %add = add i32 %shl, %src1
346  %cast = bitcast i32 %add to float
347  ret float %cast
348}
349
350define amdgpu_ps <2 x i32> @s_shl1_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) {
351; GFX9-LABEL: s_shl1_add_u32_v2:
352; GFX9:       ; %bb.0:
353; GFX9-NEXT:    s_lshl1_add_u32 s0, s0, s2
354; GFX9-NEXT:    s_lshl1_add_u32 s1, s1, s3
355; GFX9-NEXT:    ; return to shader part epilog
356;
357; GFX8-LABEL: s_shl1_add_u32_v2:
358; GFX8:       ; %bb.0:
359; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
360; GFX8-NEXT:    s_lshl_b32 s1, s1, 1
361; GFX8-NEXT:    s_add_i32 s0, s0, s2
362; GFX8-NEXT:    s_add_i32 s1, s1, s3
363; GFX8-NEXT:    ; return to shader part epilog
364;
365; GFX10-LABEL: s_shl1_add_u32_v2:
366; GFX10:       ; %bb.0:
367; GFX10-NEXT:    s_lshl1_add_u32 s0, s0, s2
368; GFX10-NEXT:    s_lshl1_add_u32 s1, s1, s3
369; GFX10-NEXT:    ; return to shader part epilog
370  %shl = shl <2 x i32> %src0, <i32 1, i32 1>
371  %add = add <2 x i32> %shl, %src1
372  ret <2 x i32> %add
373}
374
375define amdgpu_ps <2 x i32> @s_shl2_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) {
376; GFX9-LABEL: s_shl2_add_u32_v2:
377; GFX9:       ; %bb.0:
378; GFX9-NEXT:    s_lshl2_add_u32 s0, s0, s2
379; GFX9-NEXT:    s_lshl2_add_u32 s1, s1, s3
380; GFX9-NEXT:    ; return to shader part epilog
381;
382; GFX8-LABEL: s_shl2_add_u32_v2:
383; GFX8:       ; %bb.0:
384; GFX8-NEXT:    s_lshl_b32 s0, s0, 2
385; GFX8-NEXT:    s_lshl_b32 s1, s1, 2
386; GFX8-NEXT:    s_add_i32 s0, s0, s2
387; GFX8-NEXT:    s_add_i32 s1, s1, s3
388; GFX8-NEXT:    ; return to shader part epilog
389;
390; GFX10-LABEL: s_shl2_add_u32_v2:
391; GFX10:       ; %bb.0:
392; GFX10-NEXT:    s_lshl2_add_u32 s0, s0, s2
393; GFX10-NEXT:    s_lshl2_add_u32 s1, s1, s3
394; GFX10-NEXT:    ; return to shader part epilog
395  %shl = shl <2 x i32> %src0, <i32 2, i32 2>
396  %add = add <2 x i32> %shl, %src1
397  ret <2 x i32> %add
398}
399
400define amdgpu_ps <2 x i32> @s_shl3_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) {
401; GFX9-LABEL: s_shl3_add_u32_v2:
402; GFX9:       ; %bb.0:
403; GFX9-NEXT:    s_lshl3_add_u32 s0, s0, s2
404; GFX9-NEXT:    s_lshl3_add_u32 s1, s1, s3
405; GFX9-NEXT:    ; return to shader part epilog
406;
407; GFX8-LABEL: s_shl3_add_u32_v2:
408; GFX8:       ; %bb.0:
409; GFX8-NEXT:    s_lshl_b32 s0, s0, 3
410; GFX8-NEXT:    s_lshl_b32 s1, s1, 3
411; GFX8-NEXT:    s_add_i32 s0, s0, s2
412; GFX8-NEXT:    s_add_i32 s1, s1, s3
413; GFX8-NEXT:    ; return to shader part epilog
414;
415; GFX10-LABEL: s_shl3_add_u32_v2:
416; GFX10:       ; %bb.0:
417; GFX10-NEXT:    s_lshl3_add_u32 s0, s0, s2
418; GFX10-NEXT:    s_lshl3_add_u32 s1, s1, s3
419; GFX10-NEXT:    ; return to shader part epilog
420  %shl = shl <2 x i32> %src0, <i32 3, i32 3>
421  %add = add <2 x i32> %shl, %src1
422  ret <2 x i32> %add
423}
424
425define amdgpu_ps <2 x i32> @s_shl4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) {
426; GFX9-LABEL: s_shl4_add_u32_v2:
427; GFX9:       ; %bb.0:
428; GFX9-NEXT:    s_lshl4_add_u32 s0, s0, s2
429; GFX9-NEXT:    s_lshl4_add_u32 s1, s1, s3
430; GFX9-NEXT:    ; return to shader part epilog
431;
432; GFX8-LABEL: s_shl4_add_u32_v2:
433; GFX8:       ; %bb.0:
434; GFX8-NEXT:    s_lshl_b32 s0, s0, 4
435; GFX8-NEXT:    s_lshl_b32 s1, s1, 4
436; GFX8-NEXT:    s_add_i32 s0, s0, s2
437; GFX8-NEXT:    s_add_i32 s1, s1, s3
438; GFX8-NEXT:    ; return to shader part epilog
439;
440; GFX10-LABEL: s_shl4_add_u32_v2:
441; GFX10:       ; %bb.0:
442; GFX10-NEXT:    s_lshl4_add_u32 s0, s0, s2
443; GFX10-NEXT:    s_lshl4_add_u32 s1, s1, s3
444; GFX10-NEXT:    ; return to shader part epilog
445  %shl = shl <2 x i32> %src0, <i32 4, i32 4>
446  %add = add <2 x i32> %shl, %src1
447  ret <2 x i32> %add
448}
449
450define amdgpu_ps <2 x i32> @s_shl_2_4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) {
451; GFX9-LABEL: s_shl_2_4_add_u32_v2:
452; GFX9:       ; %bb.0:
453; GFX9-NEXT:    s_lshl2_add_u32 s0, s0, s2
454; GFX9-NEXT:    s_lshl4_add_u32 s1, s1, s3
455; GFX9-NEXT:    ; return to shader part epilog
456;
457; GFX8-LABEL: s_shl_2_4_add_u32_v2:
458; GFX8:       ; %bb.0:
459; GFX8-NEXT:    s_lshl_b32 s0, s0, 2
460; GFX8-NEXT:    s_lshl_b32 s1, s1, 4
461; GFX8-NEXT:    s_add_i32 s0, s0, s2
462; GFX8-NEXT:    s_add_i32 s1, s1, s3
463; GFX8-NEXT:    ; return to shader part epilog
464;
465; GFX10-LABEL: s_shl_2_4_add_u32_v2:
466; GFX10:       ; %bb.0:
467; GFX10-NEXT:    s_lshl2_add_u32 s0, s0, s2
468; GFX10-NEXT:    s_lshl4_add_u32 s1, s1, s3
469; GFX10-NEXT:    ; return to shader part epilog
470  %shl = shl <2 x i32> %src0, <i32 2, i32 4>
471  %add = add <2 x i32> %shl, %src1
472  ret <2 x i32> %add
473}
474
475define amdgpu_ps { i32, i32 } @s_shl4_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) {
476; GCN-LABEL: s_shl4_add_u32_multi_use:
477; GCN:       ; %bb.0:
478; GCN-NEXT:    s_lshl_b32 s0, s0, 4
479; GCN-NEXT:    s_add_i32 s1, s0, s1
480; GCN-NEXT:    ; return to shader part epilog
481  %shl = shl i32 %src0, 4
482  %add = add i32 %shl, %src1
483  %insert0 = insertvalue { i32, i32 } undef, i32 %shl, 0
484  %insert1 = insertvalue { i32, i32 } %insert0, i32 %add, 1
485  ret { i32, i32 } %insert1
486}
487
488define amdgpu_ps { i32, i32 } @s_shl3_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) {
489; GCN-LABEL: s_shl3_add_u32_multi_use:
490; GCN:       ; %bb.0:
491; GCN-NEXT:    s_lshl_b32 s0, s0, 3
492; GCN-NEXT:    s_add_i32 s1, s0, s1
493; GCN-NEXT:    ; return to shader part epilog
494  %shl = shl i32 %src0, 3
495  %add = add i32 %shl, %src1
496  %insert0 = insertvalue { i32, i32 } undef, i32 %shl, 0
497  %insert1 = insertvalue { i32, i32 } %insert0, i32 %add, 1
498  ret { i32, i32 } %insert1
499}
500
501define amdgpu_ps { i32, i32 } @s_shl2_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) {
502; GCN-LABEL: s_shl2_add_u32_multi_use:
503; GCN:       ; %bb.0:
504; GCN-NEXT:    s_lshl_b32 s0, s0, 2
505; GCN-NEXT:    s_add_i32 s1, s0, s1
506; GCN-NEXT:    ; return to shader part epilog
507  %shl = shl i32 %src0, 2
508  %add = add i32 %shl, %src1
509  %insert0 = insertvalue { i32, i32 } undef, i32 %shl, 0
510  %insert1 = insertvalue { i32, i32 } %insert0, i32 %add, 1
511  ret { i32, i32 } %insert1
512}
513
514
515define amdgpu_ps { i32, i32 } @s_shl1_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) {
516; GCN-LABEL: s_shl1_add_u32_multi_use:
517; GCN:       ; %bb.0:
518; GCN-NEXT:    s_lshl_b32 s0, s0, 1
519; GCN-NEXT:    s_add_i32 s1, s0, s1
520; GCN-NEXT:    ; return to shader part epilog
521  %shl = shl i32 %src0, 1
522  %add = add i32 %shl, %src1
523  %insert0 = insertvalue { i32, i32 } undef, i32 %shl, 0
524  %insert1 = insertvalue { i32, i32 } %insert0, i32 %add, 1
525  ret { i32, i32 } %insert1
526}
527